diff --git a/README.md b/README.md index 0d13237..1df11db 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,7 @@ To facilitate the dual-license model, contributions to the project can only be a * SV interface for segmented RAM * SV interface for DMA descriptors * AXI central DMA + * AXI streaming DMA * Segmented SDP RAM * Segmented dual-clock SDP RAM * Ethernet diff --git a/src/dma/rtl/taxi_axi_dma.f b/src/dma/rtl/taxi_axi_dma.f new file mode 100644 index 0000000..7413899 --- /dev/null +++ b/src/dma/rtl/taxi_axi_dma.f @@ -0,0 +1,6 @@ +taxi_axi_dma.sv +taxi_axi_dma_wr.sv +taxi_axi_dma_rd.sv +taxi_dma_desc_if.sv +../lib/taxi/src/axis/rtl/taxi_axis_if.sv +../lib/taxi/src/axi/rtl/taxi_axi_if.sv diff --git a/src/dma/rtl/taxi_axi_dma.sv b/src/dma/rtl/taxi_axi_dma.sv new file mode 100644 index 0000000..c0b89a8 --- /dev/null +++ b/src/dma/rtl/taxi_axi_dma.sv @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2018-2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI4 DMA + */ +module taxi_axi_dma # +( + // Maximum AXI burst length to generate + parameter AXI_MAX_BURST_LEN = 16, + // Enable support for unaligned transfers + parameter logic UNALIGNED_EN = 1'b1 +) +( + input wire logic clk, + input wire logic rst, + + /* + * DMA read descriptor + */ + taxi_dma_desc_if.req_snk rd_desc_req, + taxi_dma_desc_if.sts_src rd_desc_sts, + + /* + * DMA write descriptor + */ + taxi_dma_desc_if.req_snk wr_desc_req, + taxi_dma_desc_if.sts_src wr_desc_sts, + + /* + * AXI stream read data output + */ + taxi_axis_if.src m_axis_rd_data, + + /* + * AXI stream write data input + */ + taxi_axis_if.snk s_axis_wr_data, + + /* + * AXI4 master interface + */ + taxi_axi_if.wr_mst m_axi_wr, + taxi_axi_if.rd_mst m_axi_rd, + + /* + * Configuration + */ + input wire logic read_enable, + input wire logic write_enable, + input wire logic write_abort +); + +taxi_axi_dma_rd #( + .AXI_MAX_BURST_LEN(AXI_MAX_BURST_LEN), + .UNALIGNED_EN(UNALIGNED_EN) +) +axi_dma_rd_inst ( + .clk(clk), + .rst(rst), + + /* + * DMA read descriptor + */ + .rd_desc_req(rd_desc_req), + .rd_desc_sts(rd_desc_sts), + + /* + * AXI stream read data output + */ + .m_axis_rd_data(m_axis_rd_data), + + /* + * AXI4 master interface + */ + .m_axi_rd(m_axi_rd), + + /* + * Configuration + */ + .enable(read_enable) +); + +taxi_axi_dma_wr #( + .AXI_MAX_BURST_LEN(AXI_MAX_BURST_LEN), + .UNALIGNED_EN(UNALIGNED_EN) +) +axi_dma_wr_inst ( + .clk(clk), + .rst(rst), + + /* + * DMA write descriptor + */ + .wr_desc_req(wr_desc_req), + .wr_desc_sts(wr_desc_sts), + + /* + * AXI stream write data input + */ + .s_axis_wr_data(s_axis_wr_data), + + /* + * AXI4 master interface + */ + .m_axi_wr(m_axi_wr), + + /* + * Configuration + */ + .enable(write_enable), + .abort(write_abort) +); + +endmodule + +`resetall diff --git a/src/dma/rtl/taxi_axi_dma_rd.sv b/src/dma/rtl/taxi_axi_dma_rd.sv new file mode 100644 index 0000000..51fa293 --- /dev/null +++ b/src/dma/rtl/taxi_axi_dma_rd.sv @@ -0,0 +1,617 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2018-2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI4 DMA + */ +module taxi_axi_dma_rd # +( + // Maximum AXI burst length to generate + parameter AXI_MAX_BURST_LEN = 16, + // Enable support for unaligned transfers + parameter logic UNALIGNED_EN = 1'b1 +) +( + input wire logic clk, + input wire logic rst, + + /* + * DMA read descriptor + */ + taxi_dma_desc_if.req_snk rd_desc_req, + taxi_dma_desc_if.sts_src rd_desc_sts, + + /* + * AXI stream read data output + */ + taxi_axis_if.src m_axis_rd_data, + + /* + * AXI4 master interface + */ + taxi_axi_if.rd_mst m_axi_rd, + + /* + * Configuration + */ + input wire logic enable +); + +// extract parameters +localparam AXI_DATA_W = m_axi_rd.DATA_W; +localparam AXI_ADDR_W = m_axi_rd.ADDR_W; +localparam AXI_STRB_W = m_axi_rd.STRB_W; +localparam AXI_ID_W = m_axi_rd.ID_W; +localparam AXI_MAX_BURST_LEN_INT = AXI_MAX_BURST_LEN < m_axi_rd.MAX_BURST_LEN ? AXI_MAX_BURST_LEN : m_axi_rd.MAX_BURST_LEN; + +localparam LEN_W = rd_desc_req.LEN_W; +localparam TAG_W = rd_desc_req.TAG_W; + +localparam AXIS_DATA_W = m_axis_rd_data.DATA_W; +localparam AXIS_KEEP_EN = m_axis_rd_data.KEEP_EN; +localparam AXIS_KEEP_W = m_axis_rd_data.KEEP_W; +localparam AXIS_LAST_EN = m_axis_rd_data.LAST_EN; +localparam AXIS_ID_EN = m_axis_rd_data.ID_EN; +localparam AXIS_ID_W = m_axis_rd_data.ID_W; +localparam AXIS_DEST_EN = m_axis_rd_data.DEST_EN; +localparam AXIS_DEST_W = m_axis_rd_data.DEST_W; +localparam AXIS_USER_EN = m_axis_rd_data.USER_EN; +localparam AXIS_USER_W = m_axis_rd_data.USER_W; + +localparam AXI_BYTE_LANES = AXI_STRB_W; +localparam AXI_BYTE_SIZE = AXI_DATA_W/AXI_BYTE_LANES; +localparam AXI_BURST_SIZE = $clog2(AXI_STRB_W); +localparam AXI_MAX_BURST_SIZE = AXI_MAX_BURST_LEN << AXI_BURST_SIZE; + +localparam AXIS_KEEP_W_INT = AXIS_KEEP_EN ? AXIS_KEEP_W : 1; +localparam AXIS_BYTE_LANES = AXIS_KEEP_W_INT; +localparam AXIS_BYTE_SIZE = AXIS_DATA_W/AXIS_BYTE_LANES; + +localparam OFFSET_W = AXI_STRB_W > 1 ? $clog2(AXI_STRB_W) : 1; +localparam OFFSET_MASK = AXI_STRB_W > 1 ? {OFFSET_W{1'b1}} : 0; +localparam ADDR_MASK = {AXI_ADDR_W{1'b1}} << $clog2(AXI_STRB_W); +localparam CYCLE_COUNT_W = 13 - AXI_BURST_SIZE; + +localparam OUTPUT_FIFO_AW = 5; + +// check configuration +if (AXI_BYTE_SIZE * AXI_STRB_W != AXI_DATA_W) + $fatal(0, "Error: AXI data width not evenly divisible (instance %m)"); + +if (AXIS_BYTE_SIZE * AXIS_KEEP_W_INT != AXIS_DATA_W) + $fatal(0, "Error: AXI stream data width not evenly divisible (instance %m)"); + +if (AXI_BYTE_SIZE != AXIS_BYTE_SIZE) + $fatal(0, "Error: word size mismatch (instance %m)"); + +if (2**$clog2(AXI_BYTE_LANES) != AXI_BYTE_LANES) + $fatal(0, "Error: AXI word width must be even power of two (instance %m)"); + +if (AXI_DATA_W != AXIS_DATA_W) + $fatal(0, "Error: AXI interface width must match AXI stream interface width (instance %m)"); + +if (AXI_MAX_BURST_LEN < 1 || AXI_MAX_BURST_LEN > 256) + $fatal(0, "Error: AXI_MAX_BURST_LEN must be between 1 and 256 (instance %m)"); + +if (rd_desc_req.SRC_ADDR_W < AXI_ADDR_W) + $fatal(0, "Error: Descriptor address width is not sufficient (instance %m)"); + +localparam logic [1:0] + AXI_RESP_OKAY = 2'b00, + AXI_RESP_EXOKAY = 2'b01, + AXI_RESP_SLVERR = 2'b10, + AXI_RESP_DECERR = 2'b11; + +localparam logic [3:0] + DMA_ERROR_NONE = 4'd0, + DMA_ERROR_TIMEOUT = 4'd1, + DMA_ERROR_PARITY = 4'd2, + DMA_ERROR_AXI_RD_SLVERR = 4'd4, + DMA_ERROR_AXI_RD_DECERR = 4'd5, + DMA_ERROR_AXI_WR_SLVERR = 4'd6, + DMA_ERROR_AXI_WR_DECERR = 4'd7, + DMA_ERROR_PCIE_FLR = 4'd8, + DMA_ERROR_PCIE_CPL_POISONED = 4'd9, + DMA_ERROR_PCIE_CPL_STATUS_UR = 4'd10, + DMA_ERROR_PCIE_CPL_STATUS_CA = 4'd11; + +localparam logic [0:0] + AXI_STATE_IDLE = 1'd0, + AXI_STATE_START = 1'd1; + +logic [0:0] axi_state_reg = AXI_STATE_IDLE, axi_state_next; + +localparam logic [0:0] + AXIS_STATE_IDLE = 1'd0, + AXIS_STATE_READ = 1'd1; + +logic [0:0] axis_state_reg = AXIS_STATE_IDLE, axis_state_next; + +// datapath control signals +logic transfer_in_save; +logic axis_cmd_ready; + +logic [AXI_ADDR_W-1:0] addr_reg = '0, addr_next; +logic [LEN_W-1:0] op_count_reg = '0, op_count_next; +logic [12:0] tr_count_reg = '0, tr_count_next; + +logic [OFFSET_W-1:0] axis_cmd_offset_reg = '0, axis_cmd_offset_next; +logic [OFFSET_W-1:0] axis_cmd_last_cycle_offset_reg = '0, axis_cmd_last_cycle_offset_next; +logic [CYCLE_COUNT_W-1:0] axis_cmd_input_cycle_count_reg = '0, axis_cmd_input_cycle_count_next; +logic [CYCLE_COUNT_W-1:0] axis_cmd_output_cycle_count_reg = '0, axis_cmd_output_cycle_count_next; +logic axis_cmd_bubble_cycle_reg = 1'b0, axis_cmd_bubble_cycle_next; +logic [TAG_W-1:0] axis_cmd_tag_reg = '0, axis_cmd_tag_next; +logic [AXIS_ID_W-1:0] axis_cmd_axis_id_reg = '0, axis_cmd_axis_id_next; +logic [AXIS_DEST_W-1:0] axis_cmd_axis_dest_reg = '0, axis_cmd_axis_dest_next; +logic [AXIS_USER_W-1:0] axis_cmd_axis_user_reg = '0, axis_cmd_axis_user_next; +logic axis_cmd_valid_reg = 1'b0, axis_cmd_valid_next; + +logic [OFFSET_W-1:0] offset_reg = '0, offset_next; +logic [OFFSET_W-1:0] last_cycle_offset_reg = '0, last_cycle_offset_next; +logic [CYCLE_COUNT_W-1:0] input_cycle_count_reg = '0, input_cycle_count_next; +logic [CYCLE_COUNT_W-1:0] output_cycle_count_reg = '0, output_cycle_count_next; +logic input_active_reg = 1'b0, input_active_next; +logic output_active_reg = 1'b0, output_active_next; +logic bubble_cycle_reg = 1'b0, bubble_cycle_next; +logic first_cycle_reg = 1'b0, first_cycle_next; +logic output_last_cycle_reg = 1'b0, output_last_cycle_next; +logic [1:0] rresp_reg = AXI_RESP_OKAY, rresp_next; + +logic [TAG_W-1:0] tag_reg = '0, tag_next; +logic [AXIS_ID_W-1:0] axis_id_reg = '0, axis_id_next; +logic [AXIS_DEST_W-1:0] axis_dest_reg = '0, axis_dest_next; +logic [AXIS_USER_W-1:0] axis_user_reg = '0, axis_user_next; + +logic rd_desc_req_ready_reg = 1'b0, rd_desc_req_ready_next; + +logic [TAG_W-1:0] rd_desc_sts_tag_reg = '0, rd_desc_sts_tag_next; +logic [3:0] rd_desc_sts_error_reg = 4'd0, rd_desc_sts_error_next; +logic rd_desc_sts_valid_reg = 1'b0, rd_desc_sts_valid_next; + +logic [AXI_ADDR_W-1:0] m_axi_araddr_reg = '0, m_axi_araddr_next; +logic [7:0] m_axi_arlen_reg = 8'd0, m_axi_arlen_next; +logic m_axi_arvalid_reg = 1'b0, m_axi_arvalid_next; +logic m_axi_rready_reg = 1'b0, m_axi_rready_next; + +logic [AXI_DATA_W-1:0] save_axi_rdata_reg = '0; + +wire [AXI_DATA_W*2-1:0] axi_rdata_full = {m_axi_rd.rdata, save_axi_rdata_reg}; +wire [AXI_DATA_W-1:0] shift_axi_rdata = axi_rdata_full[(OFFSET_W+1)'(AXI_STRB_W-offset_reg)*AXI_BYTE_SIZE +: AXI_DATA_W]; + +// internal datapath +logic [AXIS_DATA_W-1:0] m_axis_rd_data_tdata_int; +logic [AXIS_KEEP_W-1:0] m_axis_rd_data_tkeep_int; +logic m_axis_rd_data_tvalid_int; +wire m_axis_rd_data_tready_int; +logic m_axis_rd_data_tlast_int; +logic [AXIS_ID_W-1:0] m_axis_rd_data_tid_int; +logic [AXIS_DEST_W-1:0] m_axis_rd_data_tdest_int; +logic [AXIS_USER_W-1:0] m_axis_rd_data_tuser_int; + +assign rd_desc_req.req_ready = rd_desc_req_ready_reg; + +assign rd_desc_sts.sts_len = '0; +assign rd_desc_sts.sts_tag = rd_desc_sts_tag_reg; +assign rd_desc_sts.sts_id = '0; +assign rd_desc_sts.sts_dest = '0; +assign rd_desc_sts.sts_user = '0; +assign rd_desc_sts.sts_error = rd_desc_sts_error_reg; +assign rd_desc_sts.sts_valid = rd_desc_sts_valid_reg; + +assign m_axi_rd.arid = '0; +assign m_axi_rd.araddr = m_axi_araddr_reg; +assign m_axi_rd.arlen = m_axi_arlen_reg; +assign m_axi_rd.arsize = 3'(AXI_BURST_SIZE); +assign m_axi_rd.arburst = 2'b01; +assign m_axi_rd.arlock = 1'b0; +assign m_axi_rd.arcache = 4'b0011; +assign m_axi_rd.arprot = 3'b010; +assign m_axi_rd.arvalid = m_axi_arvalid_reg; +assign m_axi_rd.rready = m_axi_rready_reg; + +always_comb begin + axi_state_next = AXI_STATE_IDLE; + + rd_desc_req_ready_next = 1'b0; + + m_axi_araddr_next = m_axi_araddr_reg; + m_axi_arlen_next = m_axi_arlen_reg; + m_axi_arvalid_next = m_axi_arvalid_reg && !m_axi_rd.arready; + + addr_next = addr_reg; + op_count_next = op_count_reg; + tr_count_next = tr_count_reg; + + axis_cmd_offset_next = axis_cmd_offset_reg; + axis_cmd_last_cycle_offset_next = axis_cmd_last_cycle_offset_reg; + axis_cmd_input_cycle_count_next = axis_cmd_input_cycle_count_reg; + axis_cmd_output_cycle_count_next = axis_cmd_output_cycle_count_reg; + axis_cmd_bubble_cycle_next = axis_cmd_bubble_cycle_reg; + axis_cmd_tag_next = axis_cmd_tag_reg; + axis_cmd_axis_id_next = axis_cmd_axis_id_reg; + axis_cmd_axis_dest_next = axis_cmd_axis_dest_reg; + axis_cmd_axis_user_next = axis_cmd_axis_user_reg; + axis_cmd_valid_next = axis_cmd_valid_reg && !axis_cmd_ready; + + case (axi_state_reg) + AXI_STATE_IDLE: begin + // idle state - load new descriptor to start operation + rd_desc_req_ready_next = !axis_cmd_valid_reg && enable; + + if (rd_desc_req.req_ready && rd_desc_req.req_valid) begin + if (UNALIGNED_EN) begin + addr_next = rd_desc_req.req_src_addr; + axis_cmd_offset_next = AXI_STRB_W > 1 ? OFFSET_W'(AXI_STRB_W) - OFFSET_W'(rd_desc_req.req_src_addr & OFFSET_MASK) : '0; + axis_cmd_bubble_cycle_next = axis_cmd_offset_next > 0; + axis_cmd_last_cycle_offset_next = OFFSET_W'(rd_desc_req.req_len & OFFSET_MASK); + end else begin + addr_next = rd_desc_req.req_src_addr & ADDR_MASK; + axis_cmd_offset_next = '0; + axis_cmd_bubble_cycle_next = 1'b0; + axis_cmd_last_cycle_offset_next = OFFSET_W'(rd_desc_req.req_len & OFFSET_MASK); + end + axis_cmd_tag_next = rd_desc_req.req_tag; + op_count_next = rd_desc_req.req_len; + + axis_cmd_axis_id_next = rd_desc_req.req_id; + axis_cmd_axis_dest_next = rd_desc_req.req_dest; + axis_cmd_axis_user_next = rd_desc_req.req_user; + + if (UNALIGNED_EN) begin + axis_cmd_input_cycle_count_next = CYCLE_COUNT_W'((op_count_next + LEN_W'(rd_desc_req.req_src_addr & OFFSET_MASK) - LEN_W'(1)) >> AXI_BURST_SIZE); + end else begin + axis_cmd_input_cycle_count_next = CYCLE_COUNT_W'((op_count_next - LEN_W'(1)) >> AXI_BURST_SIZE); + end + axis_cmd_output_cycle_count_next = CYCLE_COUNT_W'((op_count_next - LEN_W'(1)) >> AXI_BURST_SIZE); + + axis_cmd_valid_next = 1'b1; + + rd_desc_req_ready_next = 1'b0; + axi_state_next = AXI_STATE_START; + end else begin + axi_state_next = AXI_STATE_IDLE; + end + end + AXI_STATE_START: begin + // start state - initiate new AXI transfer + if (!m_axi_rd.arvalid) begin + if (op_count_reg <= LEN_W'(AXI_MAX_BURST_SIZE) - LEN_W'(addr_reg & OFFSET_MASK) || AXI_MAX_BURST_SIZE >= 4096) begin + // packet smaller than max burst size + if ((12'(addr_reg & 12'hfff) + 12'(op_count_reg & 12'hfff)) >> 12 != 0 || op_count_reg >> 12 != 0) begin + // crosses 4k boundary + tr_count_next = 13'h1000 - 12'(addr_reg & 12'hfff); + end else begin + // does not cross 4k boundary + tr_count_next = 13'(op_count_reg); + end + end else begin + // packet larger than max burst size + if ((12'(addr_reg & 12'hfff) + 12'(AXI_MAX_BURST_SIZE)) >> 12 != 0) begin + // crosses 4k boundary + tr_count_next = 13'h1000 - 12'(addr_reg & 12'hfff); + end else begin + // does not cross 4k boundary + tr_count_next = 13'(AXI_MAX_BURST_SIZE) - 13'(addr_reg & OFFSET_MASK); + end + end + + m_axi_araddr_next = addr_reg; + if (UNALIGNED_EN) begin + m_axi_arlen_next = 8'((tr_count_next + 13'(addr_reg & OFFSET_MASK) - 13'd1) >> AXI_BURST_SIZE); + end else begin + m_axi_arlen_next = 8'((tr_count_next - 13'd1) >> AXI_BURST_SIZE); + end + m_axi_arvalid_next = 1'b1; + + addr_next = addr_reg + AXI_ADDR_W'(tr_count_next); + op_count_next = op_count_reg - LEN_W'(tr_count_next); + + if (op_count_next > 0) begin + axi_state_next = AXI_STATE_START; + end else begin + rd_desc_req_ready_next = !axis_cmd_valid_reg && enable; + axi_state_next = AXI_STATE_IDLE; + end + end else begin + axi_state_next = AXI_STATE_START; + end + end + endcase +end + +always_comb begin + axis_state_next = AXIS_STATE_IDLE; + + rd_desc_sts_tag_next = rd_desc_sts_tag_reg; + rd_desc_sts_error_next = rd_desc_sts_error_reg; + rd_desc_sts_valid_next = 1'b0; + + m_axis_rd_data_tdata_int = shift_axi_rdata; + m_axis_rd_data_tkeep_int = '1; + m_axis_rd_data_tlast_int = 1'b0; + m_axis_rd_data_tvalid_int = 1'b0; + m_axis_rd_data_tid_int = axis_id_reg; + m_axis_rd_data_tdest_int = axis_dest_reg; + m_axis_rd_data_tuser_int = axis_user_reg; + + m_axi_rready_next = 1'b0; + + transfer_in_save = 1'b0; + axis_cmd_ready = 1'b0; + + offset_next = offset_reg; + last_cycle_offset_next = last_cycle_offset_reg; + input_cycle_count_next = input_cycle_count_reg; + output_cycle_count_next = output_cycle_count_reg; + input_active_next = input_active_reg; + output_active_next = output_active_reg; + bubble_cycle_next = bubble_cycle_reg; + first_cycle_next = first_cycle_reg; + output_last_cycle_next = output_last_cycle_reg; + + tag_next = tag_reg; + axis_id_next = axis_id_reg; + axis_dest_next = axis_dest_reg; + axis_user_next = axis_user_reg; + + if (m_axi_rd.rready && m_axi_rd.rvalid && (m_axi_rd.rresp == AXI_RESP_SLVERR || m_axi_rd.rresp == AXI_RESP_DECERR)) begin + rresp_next = m_axi_rd.rresp; + end else begin + rresp_next = rresp_reg; + end + + case (axis_state_reg) + AXIS_STATE_IDLE: begin + // idle state - load new descriptor to start operation + m_axi_rready_next = 1'b0; + + // store transfer parameters + if (UNALIGNED_EN) begin + offset_next = axis_cmd_offset_reg; + end else begin + offset_next = 0; + end + last_cycle_offset_next = axis_cmd_last_cycle_offset_reg; + input_cycle_count_next = axis_cmd_input_cycle_count_reg; + output_cycle_count_next = axis_cmd_output_cycle_count_reg; + bubble_cycle_next = axis_cmd_bubble_cycle_reg; + tag_next = axis_cmd_tag_reg; + axis_id_next = axis_cmd_axis_id_reg; + axis_dest_next = axis_cmd_axis_dest_reg; + axis_user_next = axis_cmd_axis_user_reg; + + output_last_cycle_next = output_cycle_count_next == 0; + input_active_next = 1'b1; + output_active_next = 1'b1; + first_cycle_next = 1'b1; + + if (axis_cmd_valid_reg) begin + axis_cmd_ready = 1'b1; + m_axi_rready_next = m_axis_rd_data_tready_int; + axis_state_next = AXIS_STATE_READ; + end + end + AXIS_STATE_READ: begin + // handle AXI read data + m_axi_rready_next = m_axis_rd_data_tready_int && input_active_reg; + + if ((m_axi_rd.rready && m_axi_rd.rvalid) || !input_active_reg) begin + // transfer in AXI read data + transfer_in_save = m_axi_rd.rready && m_axi_rd.rvalid; + + if (UNALIGNED_EN && first_cycle_reg && bubble_cycle_reg) begin + if (input_active_reg) begin + input_cycle_count_next = input_cycle_count_reg - 1; + input_active_next = input_cycle_count_reg > 0; + end + bubble_cycle_next = 1'b0; + first_cycle_next = 1'b0; + + m_axi_rready_next = m_axis_rd_data_tready_int && input_active_next; + axis_state_next = AXIS_STATE_READ; + end else begin + // update counters + if (input_active_reg) begin + input_cycle_count_next = input_cycle_count_reg - 1; + input_active_next = input_cycle_count_reg > 0; + end + if (output_active_reg) begin + output_cycle_count_next = output_cycle_count_reg - 1; + output_active_next = output_cycle_count_reg > 0; + end + output_last_cycle_next = output_cycle_count_next == 0; + bubble_cycle_next = 1'b0; + first_cycle_next = 1'b0; + + // pass through read data + m_axis_rd_data_tdata_int = shift_axi_rdata; + m_axis_rd_data_tkeep_int = '1; + m_axis_rd_data_tvalid_int = 1'b1; + + if (output_last_cycle_reg) begin + // no more data to transfer, finish operation + if (last_cycle_offset_reg > 0) begin + m_axis_rd_data_tkeep_int = {AXIS_KEEP_W_INT{1'b1}} >> ((OFFSET_W+1)'(AXIS_KEEP_W_INT) - last_cycle_offset_reg); + end + m_axis_rd_data_tlast_int = 1'b1; + + rd_desc_sts_tag_next = tag_reg; + if (rresp_next == AXI_RESP_SLVERR) begin + rd_desc_sts_error_next = DMA_ERROR_AXI_RD_SLVERR; + end else if (rresp_next == AXI_RESP_DECERR) begin + rd_desc_sts_error_next = DMA_ERROR_AXI_RD_DECERR; + end else begin + rd_desc_sts_error_next = DMA_ERROR_NONE; + end + rd_desc_sts_valid_next = 1'b1; + + rresp_next = AXI_RESP_OKAY; + + m_axi_rready_next = 1'b0; + axis_state_next = AXIS_STATE_IDLE; + end else begin + // more cycles in AXI transfer + m_axi_rready_next = m_axis_rd_data_tready_int && input_active_next; + axis_state_next = AXIS_STATE_READ; + end + end + end else begin + axis_state_next = AXIS_STATE_READ; + end + end + endcase +end + +always_ff @(posedge clk) begin + axi_state_reg <= axi_state_next; + axis_state_reg <= axis_state_next; + + rd_desc_req_ready_reg <= rd_desc_req_ready_next; + + rd_desc_sts_tag_reg <= rd_desc_sts_tag_next; + rd_desc_sts_error_reg <= rd_desc_sts_error_next; + rd_desc_sts_valid_reg <= rd_desc_sts_valid_next; + + m_axi_araddr_reg <= m_axi_araddr_next; + m_axi_arlen_reg <= m_axi_arlen_next; + m_axi_arvalid_reg <= m_axi_arvalid_next; + m_axi_rready_reg <= m_axi_rready_next; + + addr_reg <= addr_next; + op_count_reg <= op_count_next; + tr_count_reg <= tr_count_next; + + axis_cmd_offset_reg <= axis_cmd_offset_next; + axis_cmd_last_cycle_offset_reg <= axis_cmd_last_cycle_offset_next; + axis_cmd_input_cycle_count_reg <= axis_cmd_input_cycle_count_next; + axis_cmd_output_cycle_count_reg <= axis_cmd_output_cycle_count_next; + axis_cmd_bubble_cycle_reg <= axis_cmd_bubble_cycle_next; + axis_cmd_tag_reg <= axis_cmd_tag_next; + axis_cmd_axis_id_reg <= axis_cmd_axis_id_next; + axis_cmd_axis_dest_reg <= axis_cmd_axis_dest_next; + axis_cmd_axis_user_reg <= axis_cmd_axis_user_next; + axis_cmd_valid_reg <= axis_cmd_valid_next; + + offset_reg <= offset_next; + last_cycle_offset_reg <= last_cycle_offset_next; + input_cycle_count_reg <= input_cycle_count_next; + output_cycle_count_reg <= output_cycle_count_next; + input_active_reg <= input_active_next; + output_active_reg <= output_active_next; + bubble_cycle_reg <= bubble_cycle_next; + first_cycle_reg <= first_cycle_next; + output_last_cycle_reg <= output_last_cycle_next; + rresp_reg <= rresp_next; + + tag_reg <= tag_next; + axis_id_reg <= axis_id_next; + axis_dest_reg <= axis_dest_next; + axis_user_reg <= axis_user_next; + + if (transfer_in_save) begin + save_axi_rdata_reg <= m_axi_rd.rdata; + end + + if (rst) begin + axi_state_reg <= AXI_STATE_IDLE; + axis_state_reg <= AXIS_STATE_IDLE; + + axis_cmd_valid_reg <= 1'b0; + + rd_desc_req_ready_reg <= 1'b0; + + rd_desc_sts_valid_reg <= 1'b0; + m_axi_arvalid_reg <= 1'b0; + m_axi_rready_reg <= 1'b0; + + rresp_reg <= AXI_RESP_OKAY; + end +end + +// output datapath logic +logic [AXIS_DATA_W-1:0] m_axis_rd_data_tdata_reg = '0; +logic [AXIS_KEEP_W-1:0] m_axis_rd_data_tkeep_reg = '0; +logic m_axis_rd_data_tvalid_reg = 1'b0; +logic m_axis_rd_data_tlast_reg = 1'b0; +logic [AXIS_ID_W-1:0] m_axis_rd_data_tid_reg = '0; +logic [AXIS_DEST_W-1:0] m_axis_rd_data_tdest_reg = '0; +logic [AXIS_USER_W-1:0] m_axis_rd_data_tuser_reg = '0; + +logic [OUTPUT_FIFO_AW+1-1:0] out_fifo_wr_ptr_reg = '0; +logic [OUTPUT_FIFO_AW+1-1:0] out_fifo_rd_ptr_reg = '0; +logic out_fifo_half_full_reg = 1'b0; + +wire out_fifo_full = out_fifo_wr_ptr_reg == (out_fifo_rd_ptr_reg ^ {1'b1, {OUTPUT_FIFO_AW{1'b0}}}); +wire out_fifo_empty = out_fifo_wr_ptr_reg == out_fifo_rd_ptr_reg; + +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [AXIS_DATA_W-1:0] out_fifo_tdata[2**OUTPUT_FIFO_AW]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [AXIS_KEEP_W-1:0] out_fifo_tkeep[2**OUTPUT_FIFO_AW]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic out_fifo_tlast[2**OUTPUT_FIFO_AW]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [AXIS_ID_W-1:0] out_fifo_tid[2**OUTPUT_FIFO_AW]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [AXIS_DEST_W-1:0] out_fifo_tdest[2**OUTPUT_FIFO_AW]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [AXIS_USER_W-1:0] out_fifo_tuser[2**OUTPUT_FIFO_AW]; + +assign m_axis_rd_data_tready_int = !out_fifo_half_full_reg; + +assign m_axis_rd_data.tdata = m_axis_rd_data_tdata_reg; +assign m_axis_rd_data.tkeep = AXIS_KEEP_EN ? m_axis_rd_data_tkeep_reg : '1; +assign m_axis_rd_data.tstrb = m_axis_rd_data.tkeep; +assign m_axis_rd_data.tvalid = m_axis_rd_data_tvalid_reg; +assign m_axis_rd_data.tlast = AXIS_LAST_EN ? m_axis_rd_data_tlast_reg : 1'b1; +assign m_axis_rd_data.tid = AXIS_ID_EN ? m_axis_rd_data_tid_reg : '0; +assign m_axis_rd_data.tdest = AXIS_DEST_EN ? m_axis_rd_data_tdest_reg : '0; +assign m_axis_rd_data.tuser = AXIS_USER_EN ? m_axis_rd_data_tuser_reg : '0; + +always_ff @(posedge clk) begin + m_axis_rd_data_tvalid_reg <= m_axis_rd_data_tvalid_reg && !m_axis_rd_data.tready; + + out_fifo_half_full_reg <= $unsigned(out_fifo_wr_ptr_reg - out_fifo_rd_ptr_reg) >= 2**(OUTPUT_FIFO_AW-1); + + if (!out_fifo_full && m_axis_rd_data_tvalid_int) begin + out_fifo_tdata[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axis_rd_data_tdata_int; + out_fifo_tkeep[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axis_rd_data_tkeep_int; + out_fifo_tlast[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axis_rd_data_tlast_int; + out_fifo_tid[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axis_rd_data_tid_int; + out_fifo_tdest[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axis_rd_data_tdest_int; + out_fifo_tuser[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axis_rd_data_tuser_int; + out_fifo_wr_ptr_reg <= out_fifo_wr_ptr_reg + 1; + end + + if (!out_fifo_empty && (!m_axis_rd_data_tvalid_reg || m_axis_rd_data.tready)) begin + m_axis_rd_data_tdata_reg <= out_fifo_tdata[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]]; + m_axis_rd_data_tkeep_reg <= out_fifo_tkeep[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]]; + m_axis_rd_data_tvalid_reg <= 1'b1; + m_axis_rd_data_tlast_reg <= out_fifo_tlast[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]]; + m_axis_rd_data_tid_reg <= out_fifo_tid[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]]; + m_axis_rd_data_tdest_reg <= out_fifo_tdest[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]]; + m_axis_rd_data_tuser_reg <= out_fifo_tuser[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]]; + out_fifo_rd_ptr_reg <= out_fifo_rd_ptr_reg + 1; + end + + if (rst) begin + out_fifo_wr_ptr_reg <= '0; + out_fifo_rd_ptr_reg <= '0; + m_axis_rd_data_tvalid_reg <= 1'b0; + end +end + +endmodule + +`resetall diff --git a/src/dma/rtl/taxi_axi_dma_wr.sv b/src/dma/rtl/taxi_axi_dma_wr.sv new file mode 100644 index 0000000..1ec5d2e --- /dev/null +++ b/src/dma/rtl/taxi_axi_dma_wr.sv @@ -0,0 +1,872 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2018-2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI4 DMA + */ +module taxi_axi_dma_wr # +( + // Maximum AXI burst length to generate + parameter AXI_MAX_BURST_LEN = 16, + // Enable support for unaligned transfers + parameter logic UNALIGNED_EN = 1'b1 +) +( + input wire logic clk, + input wire logic rst, + + /* + * DMA write descriptor + */ + taxi_dma_desc_if.req_snk wr_desc_req, + taxi_dma_desc_if.sts_src wr_desc_sts, + + /* + * AXI stream write data input + */ + taxi_axis_if.snk s_axis_wr_data, + + /* + * AXI4 master interface + */ + taxi_axi_if.wr_mst m_axi_wr, + + /* + * Configuration + */ + input wire logic enable, + input wire logic abort +); + +// extract parameters +localparam AXI_DATA_W = m_axi_wr.DATA_W; +localparam AXI_ADDR_W = m_axi_wr.ADDR_W; +localparam AXI_STRB_W = m_axi_wr.STRB_W; +localparam AXI_ID_W = m_axi_wr.ID_W; +localparam AXI_MAX_BURST_LEN_INT = AXI_MAX_BURST_LEN < m_axi_wr.MAX_BURST_LEN ? AXI_MAX_BURST_LEN : m_axi_wr.MAX_BURST_LEN; + +localparam LEN_W = wr_desc_req.LEN_W; +localparam TAG_W = wr_desc_req.TAG_W; + +localparam AXIS_DATA_W = s_axis_wr_data.DATA_W; +localparam AXIS_KEEP_EN = s_axis_wr_data.KEEP_EN; +localparam AXIS_KEEP_W = s_axis_wr_data.KEEP_W; +localparam AXIS_LAST_EN = s_axis_wr_data.LAST_EN; +localparam AXIS_ID_EN = s_axis_wr_data.ID_EN; +localparam AXIS_ID_W = s_axis_wr_data.ID_W; +localparam AXIS_DEST_EN = s_axis_wr_data.DEST_EN; +localparam AXIS_DEST_W = s_axis_wr_data.DEST_W; +localparam AXIS_USER_EN = s_axis_wr_data.USER_EN; +localparam AXIS_USER_W = s_axis_wr_data.USER_W; + +localparam AXI_BYTE_LANES = AXI_STRB_W; +localparam AXI_BYTE_SIZE = AXI_DATA_W/AXI_BYTE_LANES; +localparam AXI_BURST_SIZE = $clog2(AXI_STRB_W); +localparam AXI_MAX_BURST_SIZE = AXI_MAX_BURST_LEN << AXI_BURST_SIZE; + +localparam AXIS_KEEP_W_INT = AXIS_KEEP_EN ? AXIS_KEEP_W : 1; +localparam AXIS_BYTE_LANES = AXIS_KEEP_W_INT; +localparam AXIS_BYTE_SIZE = AXIS_DATA_W/AXIS_BYTE_LANES; + +localparam OFFSET_W = AXI_STRB_W > 1 ? $clog2(AXI_STRB_W) : 1; +localparam OFFSET_MASK = AXI_STRB_W > 1 ? {OFFSET_W{1'b1}} : 0; +localparam ADDR_MASK = {AXI_ADDR_W{1'b1}} << $clog2(AXI_STRB_W); +localparam CYCLE_COUNT_W = 13 - AXI_BURST_SIZE; + +localparam STATUS_FIFO_AW = 5; +localparam OUTPUT_FIFO_AW = 5; + +// check configuration +if (AXI_BYTE_SIZE * AXI_STRB_W != AXI_DATA_W) + $fatal(0, "Error: AXI data width not evenly divisible (instance %m)"); + +if (AXIS_BYTE_SIZE * AXIS_KEEP_W_INT != AXIS_DATA_W) + $fatal(0, "Error: AXI stream data width not evenly divisible (instance %m)"); + +if (AXI_BYTE_SIZE != AXIS_BYTE_SIZE) + $fatal(0, "Error: word size mismatch (instance %m)"); + +if (2**$clog2(AXI_BYTE_LANES) != AXI_BYTE_LANES) + $fatal(0, "Error: AXI word width must be even power of two (instance %m)"); + +if (AXI_DATA_W != AXIS_DATA_W) + $fatal(0, "Error: AXI interface width must match AXI stream interface width (instance %m)"); + +if (AXI_MAX_BURST_LEN < 1 || AXI_MAX_BURST_LEN > 256) + $fatal(0, "Error: AXI_MAX_BURST_LEN must be between 1 and 256 (instance %m)"); + +if (wr_desc_req.DST_ADDR_W < AXI_ADDR_W) + $fatal(0, "Error: Descriptor address width is not sufficient (instance %m)"); + +localparam logic [1:0] + AXI_RESP_OKAY = 2'b00, + AXI_RESP_EXOKAY = 2'b01, + AXI_RESP_SLVERR = 2'b10, + AXI_RESP_DECERR = 2'b11; + +localparam logic [3:0] + DMA_ERROR_NONE = 4'd0, + DMA_ERROR_TIMEOUT = 4'd1, + DMA_ERROR_PARITY = 4'd2, + DMA_ERROR_AXI_RD_SLVERR = 4'd4, + DMA_ERROR_AXI_RD_DECERR = 4'd5, + DMA_ERROR_AXI_WR_SLVERR = 4'd6, + DMA_ERROR_AXI_WR_DECERR = 4'd7, + DMA_ERROR_PCIE_FLR = 4'd8, + DMA_ERROR_PCIE_CPL_POISONED = 4'd9, + DMA_ERROR_PCIE_CPL_STATUS_UR = 4'd10, + DMA_ERROR_PCIE_CPL_STATUS_CA = 4'd11; + +localparam logic [2:0] + STATE_IDLE = 3'd0, + STATE_START = 3'd1, + STATE_WRITE = 3'd2, + STATE_FINISH_BURST = 3'd3, + STATE_DROP_DATA = 3'd4; + +logic [2:0] state_reg = STATE_IDLE, state_next; + +// datapath control signals +logic transfer_in_save; +logic flush_save; +logic status_fifo_we; + +logic [OFFSET_W:0] cycle_size; + +logic [AXI_ADDR_W-1:0] addr_reg = '0, addr_next; +logic [LEN_W-1:0] op_count_reg = '0, op_count_next; +logic [12:0] tr_count_reg = '0, tr_count_next; + +logic [OFFSET_W-1:0] offset_reg = '0, offset_next; +logic [AXI_STRB_W-1:0] strb_offset_mask_reg = '1, strb_offset_mask_next; +logic zero_offset_reg = 1'b1, zero_offset_next; +logic [OFFSET_W-1:0] last_cycle_offset_reg = '0, last_cycle_offset_next; +logic [LEN_W-1:0] length_reg = '0, length_next; +logic [CYCLE_COUNT_W-1:0] input_cycle_count_reg = '0, input_cycle_count_next; +logic [CYCLE_COUNT_W-1:0] output_cycle_count_reg = '0, output_cycle_count_next; +logic input_active_reg = 1'b0, input_active_next; +logic first_cycle_reg = 1'b0, first_cycle_next; +logic input_last_cycle_reg = 1'b0, input_last_cycle_next; +logic output_last_cycle_reg = 1'b0, output_last_cycle_next; +logic last_transfer_reg = 1'b0, last_transfer_next; +logic [1:0] bresp_reg = AXI_RESP_OKAY, bresp_next; + +logic [TAG_W-1:0] tag_reg = '0, tag_next; +logic [AXIS_ID_W-1:0] axis_id_reg = '0, axis_id_next; +logic [AXIS_DEST_W-1:0] axis_dest_reg = '0, axis_dest_next; +logic [AXIS_USER_W-1:0] axis_user_reg = '0, axis_user_next; + +logic [STATUS_FIFO_AW+1-1:0] status_fifo_wr_ptr_reg = '0; +logic [STATUS_FIFO_AW+1-1:0] status_fifo_rd_ptr_reg = '0, status_fifo_rd_ptr_next; +logic [LEN_W-1:0] status_fifo_len[2**STATUS_FIFO_AW]; +logic [TAG_W-1:0] status_fifo_tag[2**STATUS_FIFO_AW]; +logic [AXIS_ID_W-1:0] status_fifo_id[2**STATUS_FIFO_AW]; +logic [AXIS_DEST_W-1:0] status_fifo_dest[2**STATUS_FIFO_AW]; +logic [AXIS_USER_W-1:0] status_fifo_user[2**STATUS_FIFO_AW]; +logic status_fifo_last[2**STATUS_FIFO_AW]; +logic [LEN_W-1:0] status_fifo_wr_len; +logic [TAG_W-1:0] status_fifo_wr_tag; +logic [AXIS_ID_W-1:0] status_fifo_wr_id; +logic [AXIS_DEST_W-1:0] status_fifo_wr_dest; +logic [AXIS_USER_W-1:0] status_fifo_wr_user; +logic status_fifo_wr_last; + +logic [STATUS_FIFO_AW+1-1:0] active_count_reg = 0; +logic active_count_av_reg = 1'b1; +logic inc_active; +logic dec_active; + +logic wr_desc_req_ready_reg = 1'b0, wr_desc_req_ready_next; + +logic [LEN_W-1:0] wr_desc_sts_len_reg = '0, wr_desc_sts_len_next; +logic [TAG_W-1:0] wr_desc_sts_tag_reg = '0, wr_desc_sts_tag_next; +logic [AXIS_ID_W-1:0] wr_desc_sts_id_reg = '0, wr_desc_sts_id_next; +logic [AXIS_DEST_W-1:0] wr_desc_sts_dest_reg = '0, wr_desc_sts_dest_next; +logic [AXIS_USER_W-1:0] wr_desc_sts_user_reg = '0, wr_desc_sts_user_next; +logic [3:0] wr_desc_sts_error_reg = 4'd0, wr_desc_sts_error_next; +logic wr_desc_sts_valid_reg = 1'b0, wr_desc_sts_valid_next; + +logic [AXI_ADDR_W-1:0] m_axi_awaddr_reg = '0, m_axi_awaddr_next; +logic [7:0] m_axi_awlen_reg = 8'd0, m_axi_awlen_next; +logic m_axi_awvalid_reg = 1'b0, m_axi_awvalid_next; +logic m_axi_bready_reg = 1'b0, m_axi_bready_next; + +logic s_axis_wr_data_tready_reg = 1'b0, s_axis_wr_data_tready_next; + +logic [AXIS_DATA_W-1:0] save_axis_tdata_reg = '0; +logic [AXIS_KEEP_W_INT-1:0] save_axis_tkeep_reg = '0; +logic save_axis_tlast_reg = 1'b0; + +logic [AXIS_DATA_W-1:0] shift_axis_tdata; +logic [AXIS_KEEP_W_INT-1:0] shift_axis_tkeep; +logic shift_axis_tvalid; +logic shift_axis_tlast; +logic shift_axis_input_tready; +logic shift_axis_extra_cycle_reg = 1'b0; + +// internal datapath +logic [AXI_DATA_W-1:0] m_axi_wdata_int; +logic [AXI_STRB_W-1:0] m_axi_wstrb_int; +logic m_axi_wlast_int; +logic m_axi_wvalid_int; +wire m_axi_wready_int; + +assign wr_desc_req.req_ready = wr_desc_req_ready_reg; + +assign wr_desc_sts.sts_len = wr_desc_sts_len_reg; +assign wr_desc_sts.sts_tag = wr_desc_sts_tag_reg; +assign wr_desc_sts.sts_id = wr_desc_sts_id_reg; +assign wr_desc_sts.sts_dest = wr_desc_sts_dest_reg; +assign wr_desc_sts.sts_user = wr_desc_sts_user_reg; +assign wr_desc_sts.sts_error = wr_desc_sts_error_reg; +assign wr_desc_sts.sts_valid = wr_desc_sts_valid_reg; + +assign s_axis_wr_data.tready = s_axis_wr_data_tready_reg; + +assign m_axi_wr.awid = '0; +assign m_axi_wr.awaddr = m_axi_awaddr_reg; +assign m_axi_wr.awlen = m_axi_awlen_reg; +assign m_axi_wr.awsize = 3'(AXI_BURST_SIZE); +assign m_axi_wr.awburst = 2'b01; +assign m_axi_wr.awlock = 1'b0; +assign m_axi_wr.awcache = 4'b0011; +assign m_axi_wr.awprot = 3'b010; +assign m_axi_wr.awvalid = m_axi_awvalid_reg; +assign m_axi_wr.bready = m_axi_bready_reg; + +if (!UNALIGNED_EN || AXI_STRB_W == 1) begin : shift + always_comb begin + shift_axis_tdata = s_axis_wr_data.tdata; + shift_axis_tkeep = s_axis_wr_data.tkeep; + shift_axis_tvalid = s_axis_wr_data.tvalid; + shift_axis_tlast = AXIS_LAST_EN && s_axis_wr_data.tlast; + shift_axis_input_tready = 1'b1; + end +end else begin : shift + wire [AXIS_DATA_W*2-1:0] tdata_full = {s_axis_wr_data.tdata, save_axis_tdata_reg}; + wire [AXIS_KEEP_W*2-1:0] tkeep_full = {s_axis_wr_data.tkeep, save_axis_tkeep_reg}; + wire [AXIS_KEEP_W*2-1:0] tkeep_mask = {{AXIS_KEEP_W_INT{1'b0}}, save_axis_tkeep_reg}; + + always_comb begin + if (zero_offset_reg) begin + // passthrough if no overlap + shift_axis_tdata = s_axis_wr_data.tdata; + shift_axis_tkeep = s_axis_wr_data.tkeep; + shift_axis_tvalid = s_axis_wr_data.tvalid; + shift_axis_tlast = AXIS_LAST_EN && s_axis_wr_data.tlast; + shift_axis_input_tready = 1'b1; + end else if (!AXIS_LAST_EN) begin + shift_axis_tdata = tdata_full[(OFFSET_W+1)'(AXIS_KEEP_W_INT-offset_reg)*AXIS_BYTE_SIZE +: AXIS_DATA_W]; + shift_axis_tkeep = tkeep_full[(OFFSET_W+1)'(AXIS_KEEP_W_INT-offset_reg) +: AXIS_KEEP_W]; + shift_axis_tvalid = s_axis_wr_data.tvalid; + shift_axis_tlast = 1'b0; + shift_axis_input_tready = 1'b1; + end else if (shift_axis_extra_cycle_reg) begin + shift_axis_tdata = tdata_full[(OFFSET_W+1)'(AXIS_KEEP_W_INT-offset_reg)*AXIS_BYTE_SIZE +: AXIS_DATA_W]; + shift_axis_tkeep = tkeep_mask[(OFFSET_W+1)'(AXIS_KEEP_W_INT-offset_reg) +: AXIS_KEEP_W]; + shift_axis_tvalid = 1'b1; + shift_axis_tlast = save_axis_tlast_reg; + shift_axis_input_tready = flush_save; + end else begin + shift_axis_tdata = tdata_full[(OFFSET_W+1)'(AXIS_KEEP_W_INT-offset_reg)*AXIS_BYTE_SIZE +: AXIS_DATA_W]; + shift_axis_tkeep = tkeep_full[(OFFSET_W+1)'(AXIS_KEEP_W_INT-offset_reg) +: AXIS_KEEP_W]; + shift_axis_tvalid = s_axis_wr_data.tvalid; + shift_axis_tlast = (s_axis_wr_data.tlast && ((s_axis_wr_data.tkeep & ({AXIS_KEEP_W_INT{1'b1}} << ((OFFSET_W+1)'(AXIS_KEEP_W_INT)-offset_reg))) == 0)); + shift_axis_input_tready = !(s_axis_wr_data.tlast && s_axis_wr_data.tready && s_axis_wr_data.tvalid); + end + end +end + +always_comb begin + state_next = STATE_IDLE; + + wr_desc_req_ready_next = 1'b0; + + wr_desc_sts_len_next = wr_desc_sts_len_reg; + wr_desc_sts_tag_next = wr_desc_sts_tag_reg; + wr_desc_sts_id_next = wr_desc_sts_id_reg; + wr_desc_sts_dest_next = wr_desc_sts_dest_reg; + wr_desc_sts_user_next = wr_desc_sts_user_reg; + wr_desc_sts_error_next = wr_desc_sts_error_reg; + wr_desc_sts_valid_next = 1'b0; + + s_axis_wr_data_tready_next = 1'b0; + + m_axi_awaddr_next = m_axi_awaddr_reg; + m_axi_awlen_next = m_axi_awlen_reg; + m_axi_awvalid_next = m_axi_awvalid_reg && !m_axi_wr.awready; + m_axi_wdata_int = shift_axis_tdata; + m_axi_wstrb_int = shift_axis_tkeep; + m_axi_wlast_int = 1'b0; + m_axi_wvalid_int = 1'b0; + m_axi_bready_next = 1'b0; + + transfer_in_save = 1'b0; + flush_save = 1'b0; + status_fifo_we = 1'b0; + + cycle_size = (OFFSET_W+1)'(AXIS_KEEP_W_INT); + + addr_next = addr_reg; + offset_next = offset_reg; + strb_offset_mask_next = strb_offset_mask_reg; + zero_offset_next = zero_offset_reg; + last_cycle_offset_next = last_cycle_offset_reg; + length_next = length_reg; + op_count_next = op_count_reg; + tr_count_next = tr_count_reg; + input_cycle_count_next = input_cycle_count_reg; + output_cycle_count_next = output_cycle_count_reg; + input_active_next = input_active_reg; + first_cycle_next = first_cycle_reg; + input_last_cycle_next = input_last_cycle_reg; + output_last_cycle_next = output_last_cycle_reg; + last_transfer_next = last_transfer_reg; + + status_fifo_rd_ptr_next = status_fifo_rd_ptr_reg; + + inc_active = 1'b0; + dec_active = 1'b0; + + tag_next = tag_reg; + axis_id_next = axis_id_reg; + axis_dest_next = axis_dest_reg; + axis_user_next = axis_user_reg; + + status_fifo_wr_len = length_reg; + status_fifo_wr_tag = tag_reg; + status_fifo_wr_id = axis_id_reg; + status_fifo_wr_dest = axis_dest_reg; + status_fifo_wr_user = axis_user_reg; + status_fifo_wr_last = 1'b0; + + if (m_axi_wr.bready && m_axi_wr.bvalid && (m_axi_wr.bresp == AXI_RESP_SLVERR || m_axi_wr.bresp == AXI_RESP_DECERR)) begin + bresp_next = m_axi_wr.bresp; + end else begin + bresp_next = bresp_reg; + end + + case (state_reg) + STATE_IDLE: begin + // idle state - load new descriptor to start operation + flush_save = 1'b1; + wr_desc_req_ready_next = enable && active_count_av_reg; + + if (UNALIGNED_EN) begin + addr_next = wr_desc_req.req_dst_addr; + offset_next = OFFSET_W'(wr_desc_req.req_dst_addr & OFFSET_MASK); + strb_offset_mask_next = {AXI_STRB_W{1'b1}} << OFFSET_W'(wr_desc_req.req_dst_addr & OFFSET_MASK); + zero_offset_next = OFFSET_W'(wr_desc_req.req_dst_addr & OFFSET_MASK) == 0; + last_cycle_offset_next = offset_next + OFFSET_W'(wr_desc_req.req_len & OFFSET_MASK); + end else begin + addr_next = wr_desc_req.req_dst_addr & ADDR_MASK; + offset_next = '0; + strb_offset_mask_next = '1; + zero_offset_next = 1'b1; + last_cycle_offset_next = offset_next + OFFSET_W'(wr_desc_req.req_len & OFFSET_MASK); + end + tag_next = wr_desc_req.req_tag; + op_count_next = wr_desc_req.req_len; + first_cycle_next = 1'b1; + length_next = 0; + + if (wr_desc_req.req_ready && wr_desc_req.req_valid) begin + wr_desc_req_ready_next = 1'b0; + state_next = STATE_START; + end else begin + state_next = STATE_IDLE; + end + end + STATE_START: begin + // start state - initiate new AXI transfer + if (op_count_reg <= LEN_W'(AXI_MAX_BURST_SIZE) - LEN_W'(addr_reg & OFFSET_MASK) || AXI_MAX_BURST_SIZE >= 4096) begin + // packet smaller than max burst size + if ((12'(addr_reg & 12'hfff) + 12'(op_count_reg & 12'hfff)) >> 12 != 0 || op_count_reg >> 12 != 0) begin + // crosses 4k boundary + tr_count_next = 13'h1000 - 12'(addr_reg & 12'hfff); + end else begin + // does not cross 4k boundary + tr_count_next = 13'(op_count_reg); + end + end else begin + // packet larger than max burst size + if ((12'(addr_reg & 12'hfff) + 12'(AXI_MAX_BURST_SIZE)) >> 12 != 0) begin + // crosses 4k boundary + tr_count_next = 13'h1000 - 12'(addr_reg & 12'hfff); + end else begin + // does not cross 4k boundary + tr_count_next = 13'(AXI_MAX_BURST_SIZE) - 13'(addr_reg & OFFSET_MASK); + end + end + + input_cycle_count_next = CYCLE_COUNT_W'((tr_count_next - 13'd1) >> $clog2(AXIS_KEEP_W_INT)); + input_last_cycle_next = input_cycle_count_next == 0; + if (UNALIGNED_EN) begin + output_cycle_count_next = CYCLE_COUNT_W'((tr_count_next + 13'(addr_reg & OFFSET_MASK) - 13'd1) >> AXI_BURST_SIZE); + end else begin + output_cycle_count_next = CYCLE_COUNT_W'((tr_count_next - 13'd1) >> AXI_BURST_SIZE); + end + output_last_cycle_next = output_cycle_count_next == 0; + last_transfer_next = LEN_W'(tr_count_next) == op_count_reg; + input_active_next = 1'b1; + + if (UNALIGNED_EN) begin + if (!first_cycle_reg && last_transfer_next) begin + if (offset_reg >= last_cycle_offset_reg && last_cycle_offset_reg > 0) begin + // last cycle will be served by stored partial cycle + input_active_next = input_cycle_count_next > 0; + input_cycle_count_next = input_cycle_count_next - 1; + end + end + end + + if (!m_axi_awvalid_reg && active_count_av_reg) begin + m_axi_awaddr_next = addr_reg; + m_axi_awlen_next = 8'(output_cycle_count_next); + m_axi_awvalid_next = s_axis_wr_data.tvalid || !first_cycle_reg; + + if (m_axi_awvalid_next) begin + addr_next = addr_reg + AXI_ADDR_W'(tr_count_next); + op_count_next = op_count_reg - LEN_W'(tr_count_next); + + s_axis_wr_data_tready_next = m_axi_wready_int && input_active_next; + + inc_active = 1'b1; + + state_next = STATE_WRITE; + end else begin + state_next = STATE_START; + end + end else begin + state_next = STATE_START; + end + end + STATE_WRITE: begin + s_axis_wr_data_tready_next = m_axi_wready_int && (last_transfer_reg || input_active_reg) && shift_axis_input_tready; + + if ((s_axis_wr_data.tready && shift_axis_tvalid) || (!input_active_reg && !last_transfer_reg) || !shift_axis_input_tready) begin + if (s_axis_wr_data.tready && s_axis_wr_data.tvalid) begin + transfer_in_save = 1'b1; + + axis_id_next = s_axis_wr_data.tid; + axis_dest_next = s_axis_wr_data.tdest; + axis_user_next = s_axis_wr_data.tuser; + end + + // update counters + if (first_cycle_reg) begin + length_next = length_reg + LEN_W'(AXIS_KEEP_W_INT - offset_reg); + end else begin + length_next = length_reg + LEN_W'(AXIS_KEEP_W_INT); + end + if (input_active_reg) begin + input_cycle_count_next = input_cycle_count_reg - 1; + input_active_next = input_cycle_count_reg > 0; + end + input_last_cycle_next = input_cycle_count_next == 0; + output_cycle_count_next = output_cycle_count_reg - 1; + output_last_cycle_next = output_cycle_count_next == 0; + first_cycle_next = 1'b0; + strb_offset_mask_next = '1; + + m_axi_wdata_int = shift_axis_tdata; + m_axi_wstrb_int = strb_offset_mask_reg; + m_axi_wvalid_int = 1'b1; + + if (AXIS_LAST_EN && s_axis_wr_data.tlast) begin + // end of input frame + input_active_next = 1'b0; + s_axis_wr_data_tready_next = 1'b0; + end + + if (AXIS_LAST_EN && shift_axis_tlast) begin + // end of data packet + + cycle_size = (OFFSET_W+1)'(AXIS_KEEP_W_INT); + if (AXIS_KEEP_EN) begin + for (integer i = AXIS_KEEP_W_INT-1; i >= 0; i = i - 1) begin + if ((~shift_axis_tkeep & strb_offset_mask_reg & (1 << i)) != 0) begin + cycle_size = (OFFSET_W+1)'(i); + end + end + end + + if (output_last_cycle_reg) begin + m_axi_wlast_int = 1'b1; + + // no more data to transfer, finish operation + if (last_transfer_reg && last_cycle_offset_reg > 0) begin + if (AXIS_KEEP_EN && (shift_axis_tkeep & ~({AXI_STRB_W{1'b1}} >> ((OFFSET_W+1)'(AXI_STRB_W) - last_cycle_offset_reg))) == 0) begin + m_axi_wstrb_int = strb_offset_mask_reg & shift_axis_tkeep; + if (first_cycle_reg) begin + length_next = length_reg + LEN_W'(cycle_size - offset_reg); + end else begin + length_next = length_reg + LEN_W'(cycle_size); + end + end else begin + m_axi_wstrb_int = strb_offset_mask_reg & {AXI_STRB_W{1'b1}} >> ((OFFSET_W+1)'(AXI_STRB_W) - last_cycle_offset_reg); + if (first_cycle_reg) begin + length_next = length_reg + LEN_W'(last_cycle_offset_reg - offset_reg); + end else begin + length_next = length_reg + LEN_W'(last_cycle_offset_reg); + end + end + end else begin + if (AXIS_KEEP_EN) begin + m_axi_wstrb_int = strb_offset_mask_reg & shift_axis_tkeep; + if (first_cycle_reg) begin + length_next = length_reg + LEN_W'(cycle_size - offset_reg); + end else begin + length_next = length_reg + LEN_W'(cycle_size); + end + end + end + + // enqueue status FIFO entry for write completion + status_fifo_we = 1'b1; + status_fifo_wr_len = length_next; + status_fifo_wr_tag = tag_reg; + status_fifo_wr_id = axis_id_next; + status_fifo_wr_dest = axis_dest_next; + status_fifo_wr_user = axis_user_next; + status_fifo_wr_last = 1'b1; + + s_axis_wr_data_tready_next = 1'b0; + wr_desc_req_ready_next = enable && active_count_av_reg; + state_next = STATE_IDLE; + end else begin + // more cycles left in burst, finish burst + if (AXIS_KEEP_EN) begin + m_axi_wstrb_int = strb_offset_mask_reg & shift_axis_tkeep; + if (first_cycle_reg) begin + length_next = length_reg + LEN_W'(cycle_size - offset_reg); + end else begin + length_next = length_reg + LEN_W'(cycle_size); + end + end + + // enqueue status FIFO entry for write completion + status_fifo_we = 1'b1; + status_fifo_wr_len = length_next; + status_fifo_wr_tag = tag_reg; + status_fifo_wr_id = axis_id_next; + status_fifo_wr_dest = axis_dest_next; + status_fifo_wr_user = axis_user_next; + status_fifo_wr_last = 1'b1; + + s_axis_wr_data_tready_next = 1'b0; + state_next = STATE_FINISH_BURST; + end + + end else if (output_last_cycle_reg) begin + m_axi_wlast_int = 1'b1; + + if (op_count_reg > 0) begin + // current AXI transfer complete, but there is more data to transfer + // enqueue status FIFO entry for write completion + status_fifo_we = 1'b1; + status_fifo_wr_len = length_next; + status_fifo_wr_tag = tag_reg; + status_fifo_wr_id = axis_id_next; + status_fifo_wr_dest = axis_dest_next; + status_fifo_wr_user = axis_user_next; + status_fifo_wr_last = 1'b0; + + s_axis_wr_data_tready_next = 1'b0; + state_next = STATE_START; + end else begin + // no more data to transfer, finish operation + if (last_cycle_offset_reg > 0) begin + m_axi_wstrb_int = strb_offset_mask_reg & {AXI_STRB_W{1'b1}} >> ((OFFSET_W+1)'(AXI_STRB_W) - last_cycle_offset_reg); + if (first_cycle_reg) begin + length_next = length_reg + LEN_W'(last_cycle_offset_reg - offset_reg); + end else begin + length_next = length_reg + LEN_W'(last_cycle_offset_reg); + end + end + + // enqueue status FIFO entry for write completion + status_fifo_we = 1'b1; + status_fifo_wr_len = length_next; + status_fifo_wr_tag = tag_reg; + status_fifo_wr_id = axis_id_next; + status_fifo_wr_dest = axis_dest_next; + status_fifo_wr_user = axis_user_next; + status_fifo_wr_last = 1'b1; + + if (AXIS_LAST_EN) begin + // not at the end of packet; drop remainder + s_axis_wr_data_tready_next = shift_axis_input_tready; + state_next = STATE_DROP_DATA; + end else begin + // no framing; return to idle + s_axis_wr_data_tready_next = 1'b0; + wr_desc_req_ready_next = enable && active_count_av_reg; + state_next = STATE_IDLE; + end + end + end else begin + s_axis_wr_data_tready_next = m_axi_wready_int && (last_transfer_reg || input_active_next) && shift_axis_input_tready; + state_next = STATE_WRITE; + end + end else begin + state_next = STATE_WRITE; + end + end + STATE_FINISH_BURST: begin + // finish current AXI burst + + if (m_axi_wready_int) begin + // update counters + if (input_active_reg) begin + input_cycle_count_next = input_cycle_count_reg - 1; + input_active_next = input_cycle_count_reg > 0; + end + input_last_cycle_next = input_cycle_count_next == 0; + output_cycle_count_next = output_cycle_count_reg - 1; + output_last_cycle_next = output_cycle_count_next == 0; + + m_axi_wdata_int = '0; + m_axi_wstrb_int = '0; + m_axi_wvalid_int = 1'b1; + + if (output_last_cycle_reg) begin + // no more data to transfer, finish operation + m_axi_wlast_int = 1'b1; + + s_axis_wr_data_tready_next = 1'b0; + wr_desc_req_ready_next = enable && active_count_av_reg; + state_next = STATE_IDLE; + end else begin + // more cycles in AXI transfer + state_next = STATE_FINISH_BURST; + end + end else begin + state_next = STATE_FINISH_BURST; + end + end + STATE_DROP_DATA: begin + // drop excess AXI stream data + s_axis_wr_data_tready_next = shift_axis_input_tready; + + if (shift_axis_tvalid) begin + if (s_axis_wr_data.tready && s_axis_wr_data.tvalid) begin + transfer_in_save = 1'b1; + end + + if (shift_axis_tlast) begin + s_axis_wr_data_tready_next = 1'b0; + wr_desc_req_ready_next = enable && active_count_av_reg; + state_next = STATE_IDLE; + end else begin + state_next = STATE_DROP_DATA; + end + end else begin + state_next = STATE_DROP_DATA; + end + end + default: begin + // invalid state + state_next = STATE_IDLE; + end + endcase + + if (status_fifo_rd_ptr_reg != status_fifo_wr_ptr_reg) begin + // status FIFO not empty + if (m_axi.bready && m_axi.bvalid) begin + // got write completion, pop and return status + wr_desc_sts_len_next = status_fifo_len[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]]; + wr_desc_sts_tag_next = status_fifo_tag[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]]; + wr_desc_sts_id_next = status_fifo_id[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]]; + wr_desc_sts_dest_next = status_fifo_dest[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]]; + wr_desc_sts_user_next = status_fifo_user[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]]; + if (bresp_next == AXI_RESP_SLVERR) begin + wr_desc_sts_error_next = DMA_ERROR_AXI_WR_SLVERR; + end else if (bresp_next == AXI_RESP_DECERR) begin + wr_desc_sts_error_next = DMA_ERROR_AXI_WR_DECERR; + end else begin + wr_desc_sts_error_next = DMA_ERROR_NONE; + end + wr_desc_sts_valid_next = status_fifo_last[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]]; + status_fifo_rd_ptr_next = status_fifo_rd_ptr_reg + 1; + m_axi_bready_next = 1'b0; + + if (status_fifo_last[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]]) begin + bresp_next = AXI_RESP_OKAY; + end + + dec_active = 1'b1; + end else begin + // wait for write completion + m_axi_bready_next = 1'b1; + end + end +end + +always_ff @(posedge clk) begin + state_reg <= state_next; + + wr_desc_req_ready_reg <= wr_desc_req_ready_next; + + wr_desc_sts_len_reg <= wr_desc_sts_len_next; + wr_desc_sts_tag_reg <= wr_desc_sts_tag_next; + wr_desc_sts_id_reg <= wr_desc_sts_id_next; + wr_desc_sts_dest_reg <= wr_desc_sts_dest_next; + wr_desc_sts_user_reg <= wr_desc_sts_user_next; + wr_desc_sts_error_reg <= wr_desc_sts_error_next; + wr_desc_sts_valid_reg <= wr_desc_sts_valid_next; + + s_axis_wr_data_tready_reg <= s_axis_wr_data_tready_next; + + m_axi_awaddr_reg <= m_axi_awaddr_next; + m_axi_awlen_reg <= m_axi_awlen_next; + m_axi_awvalid_reg <= m_axi_awvalid_next; + m_axi_bready_reg <= m_axi_bready_next; + + addr_reg <= addr_next; + offset_reg <= offset_next; + strb_offset_mask_reg <= strb_offset_mask_next; + zero_offset_reg <= zero_offset_next; + last_cycle_offset_reg <= last_cycle_offset_next; + length_reg <= length_next; + op_count_reg <= op_count_next; + tr_count_reg <= tr_count_next; + input_cycle_count_reg <= input_cycle_count_next; + output_cycle_count_reg <= output_cycle_count_next; + input_active_reg <= input_active_next; + first_cycle_reg <= first_cycle_next; + input_last_cycle_reg <= input_last_cycle_next; + output_last_cycle_reg <= output_last_cycle_next; + last_transfer_reg <= last_transfer_next; + bresp_reg <= bresp_next; + + tag_reg <= tag_next; + axis_id_reg <= axis_id_next; + axis_dest_reg <= axis_dest_next; + axis_user_reg <= axis_user_next; + + // datapath + if (flush_save) begin + save_axis_tkeep_reg <= '0; + save_axis_tlast_reg <= 1'b0; + shift_axis_extra_cycle_reg <= 1'b0; + end else if (transfer_in_save) begin + save_axis_tdata_reg <= s_axis_wr_data.tdata; + save_axis_tkeep_reg <= AXIS_KEEP_EN ? s_axis_wr_data.tkeep : '1; + save_axis_tlast_reg <= s_axis_wr_data.tlast; + shift_axis_extra_cycle_reg <= s_axis_wr_data.tlast & ((s_axis_wr_data.tkeep >> ((OFFSET_W+1)'(AXIS_KEEP_W_INT) - offset_reg)) != 0); + end + + if (status_fifo_we) begin + status_fifo_len[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_len; + status_fifo_tag[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_tag; + status_fifo_id[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_id; + status_fifo_dest[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_dest; + status_fifo_user[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_user; + status_fifo_last[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_last; + status_fifo_wr_ptr_reg <= status_fifo_wr_ptr_reg + 1; + end + status_fifo_rd_ptr_reg <= status_fifo_rd_ptr_next; + + if (active_count_reg < 2**STATUS_FIFO_AW && inc_active && !dec_active) begin + active_count_reg <= active_count_reg + 1; + active_count_av_reg <= active_count_reg < (2**STATUS_FIFO_AW-1); + end else if (active_count_reg > 0 && !inc_active && dec_active) begin + active_count_reg <= active_count_reg - 1; + active_count_av_reg <= 1'b1; + end else begin + active_count_av_reg <= active_count_reg < 2**STATUS_FIFO_AW; + end + + if (rst) begin + state_reg <= STATE_IDLE; + + wr_desc_req_ready_reg <= 1'b0; + wr_desc_sts_valid_reg <= 1'b0; + + s_axis_wr_data_tready_reg <= 1'b0; + + m_axi_awvalid_reg <= 1'b0; + m_axi_bready_reg <= 1'b0; + + bresp_reg <= AXI_RESP_OKAY; + + save_axis_tlast_reg <= 1'b0; + shift_axis_extra_cycle_reg <= 1'b0; + + status_fifo_wr_ptr_reg <= 0; + status_fifo_rd_ptr_reg <= 0; + + active_count_reg <= 0; + active_count_av_reg <= 1'b1; + end +end + +// output datapath logic +logic [AXI_DATA_W-1:0] m_axi_wdata_reg = '0; +logic [AXI_STRB_W-1:0] m_axi_wstrb_reg = '0; +logic m_axi_wlast_reg = 1'b0; +logic m_axi_wvalid_reg = 1'b0; + +logic [OUTPUT_FIFO_AW+1-1:0] out_fifo_wr_ptr_reg = '0; +logic [OUTPUT_FIFO_AW+1-1:0] out_fifo_rd_ptr_reg = '0; +logic out_fifo_half_full_reg = 1'b0; + +wire out_fifo_full = out_fifo_wr_ptr_reg == (out_fifo_rd_ptr_reg ^ {1'b1, {OUTPUT_FIFO_AW{1'b0}}}); +wire out_fifo_empty = out_fifo_wr_ptr_reg == out_fifo_rd_ptr_reg; + +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [AXI_DATA_W-1:0] out_fifo_wdata[2**OUTPUT_FIFO_AW]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [AXI_STRB_W-1:0] out_fifo_wstrb[2**OUTPUT_FIFO_AW]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic out_fifo_wlast[2**OUTPUT_FIFO_AW]; + +assign m_axi_wready_int = !out_fifo_half_full_reg; + +assign m_axi_wr.wdata = m_axi_wdata_reg; +assign m_axi_wr.wstrb = m_axi_wstrb_reg; +assign m_axi_wr.wvalid = m_axi_wvalid_reg; +assign m_axi_wr.wlast = m_axi_wlast_reg; + +always_ff @(posedge clk) begin + m_axi_wvalid_reg <= m_axi_wvalid_reg && !m_axi_wr.wready; + + out_fifo_half_full_reg <= $unsigned(out_fifo_wr_ptr_reg - out_fifo_rd_ptr_reg) >= 2**(OUTPUT_FIFO_AW-1); + + if (!out_fifo_full && m_axi_wvalid_int) begin + out_fifo_wdata[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axi_wdata_int; + out_fifo_wstrb[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axi_wstrb_int; + out_fifo_wlast[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axi_wlast_int; + out_fifo_wr_ptr_reg <= out_fifo_wr_ptr_reg + 1; + end + + if (!out_fifo_empty && (!m_axi_wvalid_reg || m_axi_wr.wready)) begin + m_axi_wdata_reg <= out_fifo_wdata[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]]; + m_axi_wstrb_reg <= out_fifo_wstrb[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]]; + m_axi_wlast_reg <= out_fifo_wlast[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]]; + m_axi_wvalid_reg <= 1'b1; + out_fifo_rd_ptr_reg <= out_fifo_rd_ptr_reg + 1; + end + + if (rst) begin + out_fifo_wr_ptr_reg <= '0; + out_fifo_rd_ptr_reg <= '0; + m_axi_wvalid_reg <= 1'b0; + end +end + +endmodule + +`resetall diff --git a/src/dma/tb/taxi_axi_dma/Makefile b/src/dma/tb/taxi_axi_dma/Makefile new file mode 100644 index 0000000..4b86f3c --- /dev/null +++ b/src/dma/tb/taxi_axi_dma/Makefile @@ -0,0 +1,67 @@ +# SPDX-License-Identifier: CERN-OHL-S-2.0 +# +# Copyright (c) 2020-2025 FPGA Ninja, LLC +# +# Authors: +# - Alex Forencich + +TOPLEVEL_LANG = verilog + +SIM ?= verilator +WAVES ?= 0 + +COCOTB_HDL_TIMEUNIT = 1ns +COCOTB_HDL_TIMEPRECISION = 1ps + +RTL_DIR = ../../rtl +LIB_DIR = ../../lib +TAXI_SRC_DIR = $(LIB_DIR)/taxi/src + +DUT = taxi_axi_dma +COCOTB_TEST_MODULES = test_$(DUT) +COCOTB_TOPLEVEL = test_$(DUT) +MODULE = $(COCOTB_TEST_MODULES) +TOPLEVEL = $(COCOTB_TOPLEVEL) +VERILOG_SOURCES += $(COCOTB_TOPLEVEL).sv +VERILOG_SOURCES += $(RTL_DIR)/$(DUT).f + +# handle file list files +process_f_file = $(call process_f_files,$(addprefix $(dir $1),$(shell cat $1))) +process_f_files = $(foreach f,$1,$(if $(filter %.f,$f),$(call process_f_file,$f),$f)) +uniq_base = $(if $1,$(call uniq_base,$(foreach f,$1,$(if $(filter-out $(notdir $(lastword $1)),$(notdir $f)),$f,))) $(lastword $1)) +VERILOG_SOURCES := $(call uniq_base,$(call process_f_files,$(VERILOG_SOURCES))) + +# module parameters +export PARAM_AXI_DATA_W := 32 +export PARAM_AXI_ADDR_W := 16 +export PARAM_AXI_STRB_W := $(shell expr $(PARAM_AXI_DATA_W) / 8 ) +export PARAM_AXI_ID_W := 8 +export PARAM_AXI_MAX_BURST_LEN := 16 +export PARAM_AXIS_DATA_W := $(PARAM_AXI_DATA_W) +export PARAM_AXIS_KEEP_EN := $(shell expr $(PARAM_AXIS_DATA_W) \> 8 ) +export PARAM_AXIS_KEEP_W := $(shell expr $(PARAM_AXIS_DATA_W) / 8 ) +export PARAM_AXIS_LAST_EN := 1 +export PARAM_AXIS_ID_EN := 1 +export PARAM_AXIS_ID_W := 8 +export PARAM_AXIS_DEST_EN := 0 +export PARAM_AXIS_DEST_W := 8 +export PARAM_AXIS_USER_EN := 1 +export PARAM_AXIS_USER_W := 1 +export PARAM_LEN_W := 20 +export PARAM_TAG_W := 8 +export PARAM_UNALIGNED_EN := 1 + +ifeq ($(SIM), icarus) + PLUSARGS += -fst + + COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(COCOTB_TOPLEVEL).$(subst PARAM_,,$(v))=$($(v))) +else ifeq ($(SIM), verilator) + COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v))) + + ifeq ($(WAVES), 1) + COMPILE_ARGS += --trace-fst + VERILATOR_TRACE = 1 + endif +endif + +include $(shell cocotb-config --makefiles)/Makefile.sim diff --git a/src/dma/tb/taxi_axi_dma/test_taxi_axi_dma.py b/src/dma/tb/taxi_axi_dma/test_taxi_axi_dma.py new file mode 100644 index 0000000..03c0c40 --- /dev/null +++ b/src/dma/tb/taxi_axi_dma/test_taxi_axi_dma.py @@ -0,0 +1,284 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: CERN-OHL-S-2.0 +""" + +Copyright (c) 2020-2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +""" + +import itertools +import logging +import os + +import cocotb_test.simulator +import pytest + +import cocotb + +from cocotb.clock import Clock +from cocotb.triggers import RisingEdge +from cocotb.regression import TestFactory + +from cocotbext.axi import AxiBus, AxiRam +from cocotbext.axi import AxiStreamBus, AxiStreamFrame, AxiStreamSource, AxiStreamSink +from cocotbext.axi.stream import define_stream + +DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc", + signals=["req_src_addr", "req_dst_addr", "req_len", "req_tag", "req_valid", "req_ready"], + optional_signals=["req_id", "req_dest", "req_user"] +) + +DescStatusBus, DescStatusTransaction, DescStatusSource, DescStatusSink, DescStatusMonitor = define_stream("DescStatus", + signals=["sts_tag", "sts_error", "sts_valid"], + optional_signals=["sts_len", "sts_id", "sts_dest", "sts_user"] +) + + +class TB(object): + def __init__(self, dut): + self.dut = dut + + self.log = logging.getLogger("cocotb.tb") + self.log.setLevel(logging.DEBUG) + + cocotb.start_soon(Clock(dut.clk, 10, units="ns").start()) + + # read interface + self.read_desc_source = DescSource(DescBus.from_entity(dut.rd_desc), dut.clk, dut.rst) + self.read_desc_status_sink = DescStatusSink(DescStatusBus.from_entity(dut.rd_desc), dut.clk, dut.rst) + self.read_data_sink = AxiStreamSink(AxiStreamBus.from_entity(dut.m_axis_rd_data), dut.clk, dut.rst) + + # write interface + self.write_desc_source = DescSource(DescBus.from_entity(dut.wr_desc), dut.clk, dut.rst) + self.write_desc_status_sink = DescStatusSink(DescStatusBus.from_entity(dut.wr_desc), dut.clk, dut.rst) + self.write_data_source = AxiStreamSource(AxiStreamBus.from_entity(dut.s_axis_wr_data), dut.clk, dut.rst) + + # AXI interface + self.axi_ram = AxiRam(AxiBus.from_entity(dut.m_axi), dut.clk, dut.rst, size=2**16) + + dut.read_enable.setimmediatevalue(0) + dut.write_enable.setimmediatevalue(0) + dut.write_abort.setimmediatevalue(0) + + def set_idle_generator(self, generator=None): + if generator: + self.write_desc_source.set_pause_generator(generator()) + self.write_data_source.set_pause_generator(generator()) + self.read_desc_source.set_pause_generator(generator()) + self.axi_ram.write_if.b_channel.set_pause_generator(generator()) + self.axi_ram.read_if.r_channel.set_pause_generator(generator()) + + def set_backpressure_generator(self, generator=None): + if generator: + self.read_data_sink.set_pause_generator(generator()) + self.axi_ram.write_if.aw_channel.set_pause_generator(generator()) + self.axi_ram.write_if.w_channel.set_pause_generator(generator()) + self.axi_ram.read_if.ar_channel.set_pause_generator(generator()) + + async def cycle_reset(self): + self.dut.rst.setimmediatevalue(0) + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + self.dut.rst.value = 1 + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + self.dut.rst.value = 0 + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + + +async def run_test_write(dut, data_in=None, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + byte_lanes = tb.axi_ram.write_if.byte_lanes + step_size = 1 if int(dut.UNALIGNED_EN.value) else byte_lanes + tag_count = 2**len(tb.write_desc_source.bus.req_tag) + + cur_tag = 1 + + await tb.cycle_reset() + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + dut.write_enable.value = 1 + + for length in list(range(1, byte_lanes*4+1))+[128]: + for offset in list(range(0, byte_lanes*2, step_size))+list(range(4096-byte_lanes*2, 4096, step_size)): + for diff in [-8, -2, -1, 0, 1, 2, 8]: + if length+diff < 1: + continue + + tb.log.info("length %d, offset %d, diff %d", length, offset, diff) + addr = offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + test_data2 = bytearray([x % 256 for x in range(length+diff)]) + + tb.axi_ram.write(addr-128, b'\xaa'*(len(test_data)+256)) + + desc = DescTransaction(req_dst_addr=addr, req_len=len(test_data), req_tag=cur_tag) + await tb.write_desc_source.send(desc) + + await tb.write_data_source.send(AxiStreamFrame(test_data2, tid=cur_tag)) + + status = await tb.write_desc_status_sink.recv() + + tb.log.info("status: %s", status) + assert int(status.sts_len) == min(len(test_data), len(test_data2)) + assert int(status.sts_tag) == cur_tag + assert int(status.sts_id) == cur_tag + assert int(status.sts_error) == 0 + + tb.log.debug("%s", tb.axi_ram.hexdump_str((addr & ~0xf)-16, (((addr & 0xf)+length-1) & ~0xf)+48)) + + if len(test_data) <= len(test_data2): + assert tb.axi_ram.read(addr-8, len(test_data)+16) == b'\xaa'*8+test_data+b'\xaa'*8 + else: + assert tb.axi_ram.read(addr-8, len(test_data2)+16) == b'\xaa'*8+test_data2+b'\xaa'*8 + + cur_tag = (cur_tag + 1) % tag_count + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +async def run_test_read(dut, data_in=None, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + byte_lanes = tb.axi_ram.read_if.byte_lanes + step_size = 1 if int(dut.UNALIGNED_EN.value) else byte_lanes + tag_count = 2**len(tb.read_desc_source.bus.req_tag) + + cur_tag = 1 + + await tb.cycle_reset() + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + dut.read_enable.value = 1 + + for length in list(range(1, byte_lanes*4+1))+[128]: + for offset in list(range(0, byte_lanes*2, step_size))+list(range(4096-byte_lanes*2, 4096, step_size)): + tb.log.info("length %d, offset %d", length, offset) + addr = offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + + tb.axi_ram.write(addr-128, b'\xaa'*(len(test_data)+256)) + tb.axi_ram.write(addr, test_data) + + tb.log.debug("%s", tb.axi_ram.hexdump_str((addr & ~0xf)-16, (((addr & 0xf)+length-1) & ~0xf)+48)) + + desc = DescTransaction(req_src_addr=addr, req_len=len(test_data), req_tag=cur_tag, req_id=cur_tag) + await tb.read_desc_source.send(desc) + + status = await tb.read_desc_status_sink.recv() + + read_data = await tb.read_data_sink.recv() + + tb.log.info("status: %s", status) + tb.log.info("read_data: %s", read_data) + + assert int(status.sts_tag) == cur_tag + assert int(status.sts_error) == 0 + assert read_data.tdata == test_data + assert read_data.tid == cur_tag + + cur_tag = (cur_tag + 1) % tag_count + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +def cycle_pause(): + return itertools.cycle([1, 1, 1, 0]) + + +if getattr(cocotb, 'top', None) is not None: + + for test in [run_test_write, run_test_read]: + + factory = TestFactory(test) + factory.add_option("idle_inserter", [None, cycle_pause]) + factory.add_option("backpressure_inserter", [None, cycle_pause]) + factory.generate_tests() + + +# cocotb-test + +tests_dir = os.path.dirname(__file__) +rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl')) +lib_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'lib')) +taxi_src_dir = os.path.abspath(os.path.join(lib_dir, 'taxi', 'src')) + + +def process_f_files(files): + lst = {} + for f in files: + if f[-2:].lower() == '.f': + with open(f, 'r') as fp: + l = fp.read().split() + for f in process_f_files([os.path.join(os.path.dirname(f), x) for x in l]): + lst[os.path.basename(f)] = f + else: + lst[os.path.basename(f)] = f + return list(lst.values()) + + +@pytest.mark.parametrize("axi_data_w", [8, 16, 32]) +@pytest.mark.parametrize("unaligned", [0, 1]) +def test_taxi_axi_dma(request, axi_data_w, unaligned): + dut = "taxi_axi_dma" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = module + + verilog_sources = [ + os.path.join(tests_dir, f"{toplevel}.sv"), + os.path.join(rtl_dir, f"{dut}.f"), + ] + + verilog_sources = process_f_files(verilog_sources) + + parameters = {} + + axis_data_w = axi_data_w + + parameters['AXI_DATA_W'] = axi_data_w + parameters['AXI_ADDR_W'] = 16 + parameters['AXI_STRB_W'] = parameters['AXI_DATA_W'] // 8 + parameters['AXI_ID_W'] = 8 + parameters['AXI_MAX_BURST_LEN'] = 16 + parameters['AXIS_DATA_W'] = axis_data_w + parameters['AXIS_KEEP_EN'] = int(parameters['AXIS_DATA_W'] > 8) + parameters['AXIS_KEEP_W'] = parameters['AXIS_DATA_W'] // 8 + parameters['AXIS_LAST_EN'] = 1 + parameters['AXIS_ID_EN'] = 1 + parameters['AXIS_ID_W'] = 8 + parameters['AXIS_DEST_EN'] = 0 + parameters['AXIS_DEST_W'] = 8 + parameters['AXIS_USER_EN'] = 1 + parameters['AXIS_USER_W'] = 1 + parameters['LEN_W'] = 20 + parameters['TAG_W'] = 8 + parameters['UNALIGNED_EN'] = unaligned + + extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} + + sim_build = os.path.join(tests_dir, "sim_build", + request.node.name.replace('[', '-').replace(']', '')) + + cocotb_test.simulator.run( + simulator="verilator", + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + parameters=parameters, + sim_build=sim_build, + extra_env=extra_env, + ) diff --git a/src/dma/tb/taxi_axi_dma/test_taxi_axi_dma.sv b/src/dma/tb/taxi_axi_dma/test_taxi_axi_dma.sv new file mode 100644 index 0000000..948f151 --- /dev/null +++ b/src/dma/tb/taxi_axi_dma/test_taxi_axi_dma.sv @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI4 DMA testbench + */ +module test_taxi_axi_dma # +( + /* verilator lint_off WIDTHTRUNC */ + parameter AXI_DATA_W = 32, + parameter AXI_ADDR_W = 16, + parameter AXI_STRB_W = AXI_DATA_W / 8, + parameter AXI_ID_W = 8, + parameter AXI_MAX_BURST_LEN = 16, + parameter AXIS_DATA_W = AXI_DATA_W, + parameter logic AXIS_KEEP_EN = AXIS_DATA_W > 8, + parameter AXIS_KEEP_W = AXIS_DATA_W / 8, + parameter logic AXIS_LAST_EN = 1'b1, + parameter logic AXIS_ID_EN = 1'b1, + parameter AXIS_ID_W = 8, + parameter logic AXIS_DEST_EN = 1'b1, + parameter AXIS_DEST_W = 8, + parameter logic AXIS_USER_EN = 1'b1, + parameter AXIS_USER_W = 8, + parameter LEN_W = 20, + parameter TAG_W = 8, + parameter logic UNALIGNED_EN = 1'b1 + /* verilator lint_on WIDTHTRUNC */ +) +(); + +logic clk; +logic rst; + +taxi_dma_desc_if #( + .SRC_ADDR_W(AXI_ADDR_W), + .SRC_SEL_EN(1'b0), + .SRC_ASID_EN(1'b0), + .DST_ADDR_W(AXI_ADDR_W), + .DST_SEL_EN(1'b0), + .DST_ASID_EN(1'b0), + .IMM_EN(1'b0), + .LEN_W(LEN_W), + .TAG_W(TAG_W), + .ID_EN(AXIS_ID_EN), + .ID_W(AXIS_ID_W), + .DEST_EN(AXIS_DEST_EN), + .DEST_W(AXIS_DEST_W), + .USER_EN(AXIS_USER_EN), + .USER_W(AXIS_USER_W) +) rd_desc(), wr_desc(); + +taxi_axis_if #( + .DATA_W(AXIS_DATA_W), + .KEEP_EN(AXIS_KEEP_EN), + .KEEP_W(AXIS_KEEP_W), + .LAST_EN(AXIS_LAST_EN), + .ID_EN(AXIS_ID_EN), + .ID_W(AXIS_ID_W), + .DEST_EN(AXIS_DEST_EN), + .DEST_W(AXIS_DEST_W), + .USER_EN(AXIS_USER_EN), + .USER_W(AXIS_USER_W) +) s_axis_wr_data(), m_axis_rd_data(); + +taxi_axi_if #( + .DATA_W(AXI_DATA_W), + .ADDR_W(AXI_ADDR_W), + .STRB_W(AXI_STRB_W), + .ID_W(AXI_ID_W), + .AWUSER_EN(1'b0), + .WUSER_EN(1'b0), + .BUSER_EN(1'b0), + .ARUSER_EN(1'b0), + .RUSER_EN(1'b0), + .MAX_BURST_LEN(AXI_MAX_BURST_LEN) +) m_axi(); + +logic read_enable; +logic write_enable; +logic write_abort; + +taxi_axi_dma #( + .AXI_MAX_BURST_LEN(AXI_MAX_BURST_LEN), + .UNALIGNED_EN(UNALIGNED_EN) +) +uut ( + .clk(clk), + .rst(rst), + + /* + * DMA read descriptor + */ + .rd_desc_req(rd_desc), + .rd_desc_sts(rd_desc), + + /* + * DMA write descriptor + */ + .wr_desc_req(wr_desc), + .wr_desc_sts(wr_desc), + + /* + * AXI stream read data output + */ + .m_axis_rd_data(m_axis_rd_data), + + /* + * AXI stream write data input + */ + .s_axis_wr_data(s_axis_wr_data), + + /* + * AXI4 master interface + */ + .m_axi_wr(m_axi), + .m_axi_rd(m_axi), + + /* + * Configuration + */ + .read_enable(read_enable), + .write_enable(write_enable), + .write_abort(write_abort) +); + +endmodule + +`resetall