dma: Add AXI streaming DMA module and testbench

Signed-off-by: Alex Forencich <alex@alexforencich.com>
This commit is contained in:
Alex Forencich
2025-11-03 17:14:24 -08:00
parent 9442bb7fbb
commit 5b0c83fc57
8 changed files with 2115 additions and 0 deletions

View File

@@ -64,6 +64,7 @@ To facilitate the dual-license model, contributions to the project can only be a
* SV interface for segmented RAM
* SV interface for DMA descriptors
* AXI central DMA
* AXI streaming DMA
* Segmented SDP RAM
* Segmented dual-clock SDP RAM
* Ethernet

View File

@@ -0,0 +1,6 @@
taxi_axi_dma.sv
taxi_axi_dma_wr.sv
taxi_axi_dma_rd.sv
taxi_dma_desc_if.sv
../lib/taxi/src/axis/rtl/taxi_axis_if.sv
../lib/taxi/src/axi/rtl/taxi_axi_if.sv

128
src/dma/rtl/taxi_axi_dma.sv Normal file
View File

@@ -0,0 +1,128 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2018-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* AXI4 DMA
*/
module taxi_axi_dma #
(
// Maximum AXI burst length to generate
parameter AXI_MAX_BURST_LEN = 16,
// Enable support for unaligned transfers
parameter logic UNALIGNED_EN = 1'b1
)
(
input wire logic clk,
input wire logic rst,
/*
* DMA read descriptor
*/
taxi_dma_desc_if.req_snk rd_desc_req,
taxi_dma_desc_if.sts_src rd_desc_sts,
/*
* DMA write descriptor
*/
taxi_dma_desc_if.req_snk wr_desc_req,
taxi_dma_desc_if.sts_src wr_desc_sts,
/*
* AXI stream read data output
*/
taxi_axis_if.src m_axis_rd_data,
/*
* AXI stream write data input
*/
taxi_axis_if.snk s_axis_wr_data,
/*
* AXI4 master interface
*/
taxi_axi_if.wr_mst m_axi_wr,
taxi_axi_if.rd_mst m_axi_rd,
/*
* Configuration
*/
input wire logic read_enable,
input wire logic write_enable,
input wire logic write_abort
);
taxi_axi_dma_rd #(
.AXI_MAX_BURST_LEN(AXI_MAX_BURST_LEN),
.UNALIGNED_EN(UNALIGNED_EN)
)
axi_dma_rd_inst (
.clk(clk),
.rst(rst),
/*
* DMA read descriptor
*/
.rd_desc_req(rd_desc_req),
.rd_desc_sts(rd_desc_sts),
/*
* AXI stream read data output
*/
.m_axis_rd_data(m_axis_rd_data),
/*
* AXI4 master interface
*/
.m_axi_rd(m_axi_rd),
/*
* Configuration
*/
.enable(read_enable)
);
taxi_axi_dma_wr #(
.AXI_MAX_BURST_LEN(AXI_MAX_BURST_LEN),
.UNALIGNED_EN(UNALIGNED_EN)
)
axi_dma_wr_inst (
.clk(clk),
.rst(rst),
/*
* DMA write descriptor
*/
.wr_desc_req(wr_desc_req),
.wr_desc_sts(wr_desc_sts),
/*
* AXI stream write data input
*/
.s_axis_wr_data(s_axis_wr_data),
/*
* AXI4 master interface
*/
.m_axi_wr(m_axi_wr),
/*
* Configuration
*/
.enable(write_enable),
.abort(write_abort)
);
endmodule
`resetall

View File

@@ -0,0 +1,617 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2018-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* AXI4 DMA
*/
module taxi_axi_dma_rd #
(
// Maximum AXI burst length to generate
parameter AXI_MAX_BURST_LEN = 16,
// Enable support for unaligned transfers
parameter logic UNALIGNED_EN = 1'b1
)
(
input wire logic clk,
input wire logic rst,
/*
* DMA read descriptor
*/
taxi_dma_desc_if.req_snk rd_desc_req,
taxi_dma_desc_if.sts_src rd_desc_sts,
/*
* AXI stream read data output
*/
taxi_axis_if.src m_axis_rd_data,
/*
* AXI4 master interface
*/
taxi_axi_if.rd_mst m_axi_rd,
/*
* Configuration
*/
input wire logic enable
);
// extract parameters
localparam AXI_DATA_W = m_axi_rd.DATA_W;
localparam AXI_ADDR_W = m_axi_rd.ADDR_W;
localparam AXI_STRB_W = m_axi_rd.STRB_W;
localparam AXI_ID_W = m_axi_rd.ID_W;
localparam AXI_MAX_BURST_LEN_INT = AXI_MAX_BURST_LEN < m_axi_rd.MAX_BURST_LEN ? AXI_MAX_BURST_LEN : m_axi_rd.MAX_BURST_LEN;
localparam LEN_W = rd_desc_req.LEN_W;
localparam TAG_W = rd_desc_req.TAG_W;
localparam AXIS_DATA_W = m_axis_rd_data.DATA_W;
localparam AXIS_KEEP_EN = m_axis_rd_data.KEEP_EN;
localparam AXIS_KEEP_W = m_axis_rd_data.KEEP_W;
localparam AXIS_LAST_EN = m_axis_rd_data.LAST_EN;
localparam AXIS_ID_EN = m_axis_rd_data.ID_EN;
localparam AXIS_ID_W = m_axis_rd_data.ID_W;
localparam AXIS_DEST_EN = m_axis_rd_data.DEST_EN;
localparam AXIS_DEST_W = m_axis_rd_data.DEST_W;
localparam AXIS_USER_EN = m_axis_rd_data.USER_EN;
localparam AXIS_USER_W = m_axis_rd_data.USER_W;
localparam AXI_BYTE_LANES = AXI_STRB_W;
localparam AXI_BYTE_SIZE = AXI_DATA_W/AXI_BYTE_LANES;
localparam AXI_BURST_SIZE = $clog2(AXI_STRB_W);
localparam AXI_MAX_BURST_SIZE = AXI_MAX_BURST_LEN << AXI_BURST_SIZE;
localparam AXIS_KEEP_W_INT = AXIS_KEEP_EN ? AXIS_KEEP_W : 1;
localparam AXIS_BYTE_LANES = AXIS_KEEP_W_INT;
localparam AXIS_BYTE_SIZE = AXIS_DATA_W/AXIS_BYTE_LANES;
localparam OFFSET_W = AXI_STRB_W > 1 ? $clog2(AXI_STRB_W) : 1;
localparam OFFSET_MASK = AXI_STRB_W > 1 ? {OFFSET_W{1'b1}} : 0;
localparam ADDR_MASK = {AXI_ADDR_W{1'b1}} << $clog2(AXI_STRB_W);
localparam CYCLE_COUNT_W = 13 - AXI_BURST_SIZE;
localparam OUTPUT_FIFO_AW = 5;
// check configuration
if (AXI_BYTE_SIZE * AXI_STRB_W != AXI_DATA_W)
$fatal(0, "Error: AXI data width not evenly divisible (instance %m)");
if (AXIS_BYTE_SIZE * AXIS_KEEP_W_INT != AXIS_DATA_W)
$fatal(0, "Error: AXI stream data width not evenly divisible (instance %m)");
if (AXI_BYTE_SIZE != AXIS_BYTE_SIZE)
$fatal(0, "Error: word size mismatch (instance %m)");
if (2**$clog2(AXI_BYTE_LANES) != AXI_BYTE_LANES)
$fatal(0, "Error: AXI word width must be even power of two (instance %m)");
if (AXI_DATA_W != AXIS_DATA_W)
$fatal(0, "Error: AXI interface width must match AXI stream interface width (instance %m)");
if (AXI_MAX_BURST_LEN < 1 || AXI_MAX_BURST_LEN > 256)
$fatal(0, "Error: AXI_MAX_BURST_LEN must be between 1 and 256 (instance %m)");
if (rd_desc_req.SRC_ADDR_W < AXI_ADDR_W)
$fatal(0, "Error: Descriptor address width is not sufficient (instance %m)");
localparam logic [1:0]
AXI_RESP_OKAY = 2'b00,
AXI_RESP_EXOKAY = 2'b01,
AXI_RESP_SLVERR = 2'b10,
AXI_RESP_DECERR = 2'b11;
localparam logic [3:0]
DMA_ERROR_NONE = 4'd0,
DMA_ERROR_TIMEOUT = 4'd1,
DMA_ERROR_PARITY = 4'd2,
DMA_ERROR_AXI_RD_SLVERR = 4'd4,
DMA_ERROR_AXI_RD_DECERR = 4'd5,
DMA_ERROR_AXI_WR_SLVERR = 4'd6,
DMA_ERROR_AXI_WR_DECERR = 4'd7,
DMA_ERROR_PCIE_FLR = 4'd8,
DMA_ERROR_PCIE_CPL_POISONED = 4'd9,
DMA_ERROR_PCIE_CPL_STATUS_UR = 4'd10,
DMA_ERROR_PCIE_CPL_STATUS_CA = 4'd11;
localparam logic [0:0]
AXI_STATE_IDLE = 1'd0,
AXI_STATE_START = 1'd1;
logic [0:0] axi_state_reg = AXI_STATE_IDLE, axi_state_next;
localparam logic [0:0]
AXIS_STATE_IDLE = 1'd0,
AXIS_STATE_READ = 1'd1;
logic [0:0] axis_state_reg = AXIS_STATE_IDLE, axis_state_next;
// datapath control signals
logic transfer_in_save;
logic axis_cmd_ready;
logic [AXI_ADDR_W-1:0] addr_reg = '0, addr_next;
logic [LEN_W-1:0] op_count_reg = '0, op_count_next;
logic [12:0] tr_count_reg = '0, tr_count_next;
logic [OFFSET_W-1:0] axis_cmd_offset_reg = '0, axis_cmd_offset_next;
logic [OFFSET_W-1:0] axis_cmd_last_cycle_offset_reg = '0, axis_cmd_last_cycle_offset_next;
logic [CYCLE_COUNT_W-1:0] axis_cmd_input_cycle_count_reg = '0, axis_cmd_input_cycle_count_next;
logic [CYCLE_COUNT_W-1:0] axis_cmd_output_cycle_count_reg = '0, axis_cmd_output_cycle_count_next;
logic axis_cmd_bubble_cycle_reg = 1'b0, axis_cmd_bubble_cycle_next;
logic [TAG_W-1:0] axis_cmd_tag_reg = '0, axis_cmd_tag_next;
logic [AXIS_ID_W-1:0] axis_cmd_axis_id_reg = '0, axis_cmd_axis_id_next;
logic [AXIS_DEST_W-1:0] axis_cmd_axis_dest_reg = '0, axis_cmd_axis_dest_next;
logic [AXIS_USER_W-1:0] axis_cmd_axis_user_reg = '0, axis_cmd_axis_user_next;
logic axis_cmd_valid_reg = 1'b0, axis_cmd_valid_next;
logic [OFFSET_W-1:0] offset_reg = '0, offset_next;
logic [OFFSET_W-1:0] last_cycle_offset_reg = '0, last_cycle_offset_next;
logic [CYCLE_COUNT_W-1:0] input_cycle_count_reg = '0, input_cycle_count_next;
logic [CYCLE_COUNT_W-1:0] output_cycle_count_reg = '0, output_cycle_count_next;
logic input_active_reg = 1'b0, input_active_next;
logic output_active_reg = 1'b0, output_active_next;
logic bubble_cycle_reg = 1'b0, bubble_cycle_next;
logic first_cycle_reg = 1'b0, first_cycle_next;
logic output_last_cycle_reg = 1'b0, output_last_cycle_next;
logic [1:0] rresp_reg = AXI_RESP_OKAY, rresp_next;
logic [TAG_W-1:0] tag_reg = '0, tag_next;
logic [AXIS_ID_W-1:0] axis_id_reg = '0, axis_id_next;
logic [AXIS_DEST_W-1:0] axis_dest_reg = '0, axis_dest_next;
logic [AXIS_USER_W-1:0] axis_user_reg = '0, axis_user_next;
logic rd_desc_req_ready_reg = 1'b0, rd_desc_req_ready_next;
logic [TAG_W-1:0] rd_desc_sts_tag_reg = '0, rd_desc_sts_tag_next;
logic [3:0] rd_desc_sts_error_reg = 4'd0, rd_desc_sts_error_next;
logic rd_desc_sts_valid_reg = 1'b0, rd_desc_sts_valid_next;
logic [AXI_ADDR_W-1:0] m_axi_araddr_reg = '0, m_axi_araddr_next;
logic [7:0] m_axi_arlen_reg = 8'd0, m_axi_arlen_next;
logic m_axi_arvalid_reg = 1'b0, m_axi_arvalid_next;
logic m_axi_rready_reg = 1'b0, m_axi_rready_next;
logic [AXI_DATA_W-1:0] save_axi_rdata_reg = '0;
wire [AXI_DATA_W*2-1:0] axi_rdata_full = {m_axi_rd.rdata, save_axi_rdata_reg};
wire [AXI_DATA_W-1:0] shift_axi_rdata = axi_rdata_full[(OFFSET_W+1)'(AXI_STRB_W-offset_reg)*AXI_BYTE_SIZE +: AXI_DATA_W];
// internal datapath
logic [AXIS_DATA_W-1:0] m_axis_rd_data_tdata_int;
logic [AXIS_KEEP_W-1:0] m_axis_rd_data_tkeep_int;
logic m_axis_rd_data_tvalid_int;
wire m_axis_rd_data_tready_int;
logic m_axis_rd_data_tlast_int;
logic [AXIS_ID_W-1:0] m_axis_rd_data_tid_int;
logic [AXIS_DEST_W-1:0] m_axis_rd_data_tdest_int;
logic [AXIS_USER_W-1:0] m_axis_rd_data_tuser_int;
assign rd_desc_req.req_ready = rd_desc_req_ready_reg;
assign rd_desc_sts.sts_len = '0;
assign rd_desc_sts.sts_tag = rd_desc_sts_tag_reg;
assign rd_desc_sts.sts_id = '0;
assign rd_desc_sts.sts_dest = '0;
assign rd_desc_sts.sts_user = '0;
assign rd_desc_sts.sts_error = rd_desc_sts_error_reg;
assign rd_desc_sts.sts_valid = rd_desc_sts_valid_reg;
assign m_axi_rd.arid = '0;
assign m_axi_rd.araddr = m_axi_araddr_reg;
assign m_axi_rd.arlen = m_axi_arlen_reg;
assign m_axi_rd.arsize = 3'(AXI_BURST_SIZE);
assign m_axi_rd.arburst = 2'b01;
assign m_axi_rd.arlock = 1'b0;
assign m_axi_rd.arcache = 4'b0011;
assign m_axi_rd.arprot = 3'b010;
assign m_axi_rd.arvalid = m_axi_arvalid_reg;
assign m_axi_rd.rready = m_axi_rready_reg;
always_comb begin
axi_state_next = AXI_STATE_IDLE;
rd_desc_req_ready_next = 1'b0;
m_axi_araddr_next = m_axi_araddr_reg;
m_axi_arlen_next = m_axi_arlen_reg;
m_axi_arvalid_next = m_axi_arvalid_reg && !m_axi_rd.arready;
addr_next = addr_reg;
op_count_next = op_count_reg;
tr_count_next = tr_count_reg;
axis_cmd_offset_next = axis_cmd_offset_reg;
axis_cmd_last_cycle_offset_next = axis_cmd_last_cycle_offset_reg;
axis_cmd_input_cycle_count_next = axis_cmd_input_cycle_count_reg;
axis_cmd_output_cycle_count_next = axis_cmd_output_cycle_count_reg;
axis_cmd_bubble_cycle_next = axis_cmd_bubble_cycle_reg;
axis_cmd_tag_next = axis_cmd_tag_reg;
axis_cmd_axis_id_next = axis_cmd_axis_id_reg;
axis_cmd_axis_dest_next = axis_cmd_axis_dest_reg;
axis_cmd_axis_user_next = axis_cmd_axis_user_reg;
axis_cmd_valid_next = axis_cmd_valid_reg && !axis_cmd_ready;
case (axi_state_reg)
AXI_STATE_IDLE: begin
// idle state - load new descriptor to start operation
rd_desc_req_ready_next = !axis_cmd_valid_reg && enable;
if (rd_desc_req.req_ready && rd_desc_req.req_valid) begin
if (UNALIGNED_EN) begin
addr_next = rd_desc_req.req_src_addr;
axis_cmd_offset_next = AXI_STRB_W > 1 ? OFFSET_W'(AXI_STRB_W) - OFFSET_W'(rd_desc_req.req_src_addr & OFFSET_MASK) : '0;
axis_cmd_bubble_cycle_next = axis_cmd_offset_next > 0;
axis_cmd_last_cycle_offset_next = OFFSET_W'(rd_desc_req.req_len & OFFSET_MASK);
end else begin
addr_next = rd_desc_req.req_src_addr & ADDR_MASK;
axis_cmd_offset_next = '0;
axis_cmd_bubble_cycle_next = 1'b0;
axis_cmd_last_cycle_offset_next = OFFSET_W'(rd_desc_req.req_len & OFFSET_MASK);
end
axis_cmd_tag_next = rd_desc_req.req_tag;
op_count_next = rd_desc_req.req_len;
axis_cmd_axis_id_next = rd_desc_req.req_id;
axis_cmd_axis_dest_next = rd_desc_req.req_dest;
axis_cmd_axis_user_next = rd_desc_req.req_user;
if (UNALIGNED_EN) begin
axis_cmd_input_cycle_count_next = CYCLE_COUNT_W'((op_count_next + LEN_W'(rd_desc_req.req_src_addr & OFFSET_MASK) - LEN_W'(1)) >> AXI_BURST_SIZE);
end else begin
axis_cmd_input_cycle_count_next = CYCLE_COUNT_W'((op_count_next - LEN_W'(1)) >> AXI_BURST_SIZE);
end
axis_cmd_output_cycle_count_next = CYCLE_COUNT_W'((op_count_next - LEN_W'(1)) >> AXI_BURST_SIZE);
axis_cmd_valid_next = 1'b1;
rd_desc_req_ready_next = 1'b0;
axi_state_next = AXI_STATE_START;
end else begin
axi_state_next = AXI_STATE_IDLE;
end
end
AXI_STATE_START: begin
// start state - initiate new AXI transfer
if (!m_axi_rd.arvalid) begin
if (op_count_reg <= LEN_W'(AXI_MAX_BURST_SIZE) - LEN_W'(addr_reg & OFFSET_MASK) || AXI_MAX_BURST_SIZE >= 4096) begin
// packet smaller than max burst size
if ((12'(addr_reg & 12'hfff) + 12'(op_count_reg & 12'hfff)) >> 12 != 0 || op_count_reg >> 12 != 0) begin
// crosses 4k boundary
tr_count_next = 13'h1000 - 12'(addr_reg & 12'hfff);
end else begin
// does not cross 4k boundary
tr_count_next = 13'(op_count_reg);
end
end else begin
// packet larger than max burst size
if ((12'(addr_reg & 12'hfff) + 12'(AXI_MAX_BURST_SIZE)) >> 12 != 0) begin
// crosses 4k boundary
tr_count_next = 13'h1000 - 12'(addr_reg & 12'hfff);
end else begin
// does not cross 4k boundary
tr_count_next = 13'(AXI_MAX_BURST_SIZE) - 13'(addr_reg & OFFSET_MASK);
end
end
m_axi_araddr_next = addr_reg;
if (UNALIGNED_EN) begin
m_axi_arlen_next = 8'((tr_count_next + 13'(addr_reg & OFFSET_MASK) - 13'd1) >> AXI_BURST_SIZE);
end else begin
m_axi_arlen_next = 8'((tr_count_next - 13'd1) >> AXI_BURST_SIZE);
end
m_axi_arvalid_next = 1'b1;
addr_next = addr_reg + AXI_ADDR_W'(tr_count_next);
op_count_next = op_count_reg - LEN_W'(tr_count_next);
if (op_count_next > 0) begin
axi_state_next = AXI_STATE_START;
end else begin
rd_desc_req_ready_next = !axis_cmd_valid_reg && enable;
axi_state_next = AXI_STATE_IDLE;
end
end else begin
axi_state_next = AXI_STATE_START;
end
end
endcase
end
always_comb begin
axis_state_next = AXIS_STATE_IDLE;
rd_desc_sts_tag_next = rd_desc_sts_tag_reg;
rd_desc_sts_error_next = rd_desc_sts_error_reg;
rd_desc_sts_valid_next = 1'b0;
m_axis_rd_data_tdata_int = shift_axi_rdata;
m_axis_rd_data_tkeep_int = '1;
m_axis_rd_data_tlast_int = 1'b0;
m_axis_rd_data_tvalid_int = 1'b0;
m_axis_rd_data_tid_int = axis_id_reg;
m_axis_rd_data_tdest_int = axis_dest_reg;
m_axis_rd_data_tuser_int = axis_user_reg;
m_axi_rready_next = 1'b0;
transfer_in_save = 1'b0;
axis_cmd_ready = 1'b0;
offset_next = offset_reg;
last_cycle_offset_next = last_cycle_offset_reg;
input_cycle_count_next = input_cycle_count_reg;
output_cycle_count_next = output_cycle_count_reg;
input_active_next = input_active_reg;
output_active_next = output_active_reg;
bubble_cycle_next = bubble_cycle_reg;
first_cycle_next = first_cycle_reg;
output_last_cycle_next = output_last_cycle_reg;
tag_next = tag_reg;
axis_id_next = axis_id_reg;
axis_dest_next = axis_dest_reg;
axis_user_next = axis_user_reg;
if (m_axi_rd.rready && m_axi_rd.rvalid && (m_axi_rd.rresp == AXI_RESP_SLVERR || m_axi_rd.rresp == AXI_RESP_DECERR)) begin
rresp_next = m_axi_rd.rresp;
end else begin
rresp_next = rresp_reg;
end
case (axis_state_reg)
AXIS_STATE_IDLE: begin
// idle state - load new descriptor to start operation
m_axi_rready_next = 1'b0;
// store transfer parameters
if (UNALIGNED_EN) begin
offset_next = axis_cmd_offset_reg;
end else begin
offset_next = 0;
end
last_cycle_offset_next = axis_cmd_last_cycle_offset_reg;
input_cycle_count_next = axis_cmd_input_cycle_count_reg;
output_cycle_count_next = axis_cmd_output_cycle_count_reg;
bubble_cycle_next = axis_cmd_bubble_cycle_reg;
tag_next = axis_cmd_tag_reg;
axis_id_next = axis_cmd_axis_id_reg;
axis_dest_next = axis_cmd_axis_dest_reg;
axis_user_next = axis_cmd_axis_user_reg;
output_last_cycle_next = output_cycle_count_next == 0;
input_active_next = 1'b1;
output_active_next = 1'b1;
first_cycle_next = 1'b1;
if (axis_cmd_valid_reg) begin
axis_cmd_ready = 1'b1;
m_axi_rready_next = m_axis_rd_data_tready_int;
axis_state_next = AXIS_STATE_READ;
end
end
AXIS_STATE_READ: begin
// handle AXI read data
m_axi_rready_next = m_axis_rd_data_tready_int && input_active_reg;
if ((m_axi_rd.rready && m_axi_rd.rvalid) || !input_active_reg) begin
// transfer in AXI read data
transfer_in_save = m_axi_rd.rready && m_axi_rd.rvalid;
if (UNALIGNED_EN && first_cycle_reg && bubble_cycle_reg) begin
if (input_active_reg) begin
input_cycle_count_next = input_cycle_count_reg - 1;
input_active_next = input_cycle_count_reg > 0;
end
bubble_cycle_next = 1'b0;
first_cycle_next = 1'b0;
m_axi_rready_next = m_axis_rd_data_tready_int && input_active_next;
axis_state_next = AXIS_STATE_READ;
end else begin
// update counters
if (input_active_reg) begin
input_cycle_count_next = input_cycle_count_reg - 1;
input_active_next = input_cycle_count_reg > 0;
end
if (output_active_reg) begin
output_cycle_count_next = output_cycle_count_reg - 1;
output_active_next = output_cycle_count_reg > 0;
end
output_last_cycle_next = output_cycle_count_next == 0;
bubble_cycle_next = 1'b0;
first_cycle_next = 1'b0;
// pass through read data
m_axis_rd_data_tdata_int = shift_axi_rdata;
m_axis_rd_data_tkeep_int = '1;
m_axis_rd_data_tvalid_int = 1'b1;
if (output_last_cycle_reg) begin
// no more data to transfer, finish operation
if (last_cycle_offset_reg > 0) begin
m_axis_rd_data_tkeep_int = {AXIS_KEEP_W_INT{1'b1}} >> ((OFFSET_W+1)'(AXIS_KEEP_W_INT) - last_cycle_offset_reg);
end
m_axis_rd_data_tlast_int = 1'b1;
rd_desc_sts_tag_next = tag_reg;
if (rresp_next == AXI_RESP_SLVERR) begin
rd_desc_sts_error_next = DMA_ERROR_AXI_RD_SLVERR;
end else if (rresp_next == AXI_RESP_DECERR) begin
rd_desc_sts_error_next = DMA_ERROR_AXI_RD_DECERR;
end else begin
rd_desc_sts_error_next = DMA_ERROR_NONE;
end
rd_desc_sts_valid_next = 1'b1;
rresp_next = AXI_RESP_OKAY;
m_axi_rready_next = 1'b0;
axis_state_next = AXIS_STATE_IDLE;
end else begin
// more cycles in AXI transfer
m_axi_rready_next = m_axis_rd_data_tready_int && input_active_next;
axis_state_next = AXIS_STATE_READ;
end
end
end else begin
axis_state_next = AXIS_STATE_READ;
end
end
endcase
end
always_ff @(posedge clk) begin
axi_state_reg <= axi_state_next;
axis_state_reg <= axis_state_next;
rd_desc_req_ready_reg <= rd_desc_req_ready_next;
rd_desc_sts_tag_reg <= rd_desc_sts_tag_next;
rd_desc_sts_error_reg <= rd_desc_sts_error_next;
rd_desc_sts_valid_reg <= rd_desc_sts_valid_next;
m_axi_araddr_reg <= m_axi_araddr_next;
m_axi_arlen_reg <= m_axi_arlen_next;
m_axi_arvalid_reg <= m_axi_arvalid_next;
m_axi_rready_reg <= m_axi_rready_next;
addr_reg <= addr_next;
op_count_reg <= op_count_next;
tr_count_reg <= tr_count_next;
axis_cmd_offset_reg <= axis_cmd_offset_next;
axis_cmd_last_cycle_offset_reg <= axis_cmd_last_cycle_offset_next;
axis_cmd_input_cycle_count_reg <= axis_cmd_input_cycle_count_next;
axis_cmd_output_cycle_count_reg <= axis_cmd_output_cycle_count_next;
axis_cmd_bubble_cycle_reg <= axis_cmd_bubble_cycle_next;
axis_cmd_tag_reg <= axis_cmd_tag_next;
axis_cmd_axis_id_reg <= axis_cmd_axis_id_next;
axis_cmd_axis_dest_reg <= axis_cmd_axis_dest_next;
axis_cmd_axis_user_reg <= axis_cmd_axis_user_next;
axis_cmd_valid_reg <= axis_cmd_valid_next;
offset_reg <= offset_next;
last_cycle_offset_reg <= last_cycle_offset_next;
input_cycle_count_reg <= input_cycle_count_next;
output_cycle_count_reg <= output_cycle_count_next;
input_active_reg <= input_active_next;
output_active_reg <= output_active_next;
bubble_cycle_reg <= bubble_cycle_next;
first_cycle_reg <= first_cycle_next;
output_last_cycle_reg <= output_last_cycle_next;
rresp_reg <= rresp_next;
tag_reg <= tag_next;
axis_id_reg <= axis_id_next;
axis_dest_reg <= axis_dest_next;
axis_user_reg <= axis_user_next;
if (transfer_in_save) begin
save_axi_rdata_reg <= m_axi_rd.rdata;
end
if (rst) begin
axi_state_reg <= AXI_STATE_IDLE;
axis_state_reg <= AXIS_STATE_IDLE;
axis_cmd_valid_reg <= 1'b0;
rd_desc_req_ready_reg <= 1'b0;
rd_desc_sts_valid_reg <= 1'b0;
m_axi_arvalid_reg <= 1'b0;
m_axi_rready_reg <= 1'b0;
rresp_reg <= AXI_RESP_OKAY;
end
end
// output datapath logic
logic [AXIS_DATA_W-1:0] m_axis_rd_data_tdata_reg = '0;
logic [AXIS_KEEP_W-1:0] m_axis_rd_data_tkeep_reg = '0;
logic m_axis_rd_data_tvalid_reg = 1'b0;
logic m_axis_rd_data_tlast_reg = 1'b0;
logic [AXIS_ID_W-1:0] m_axis_rd_data_tid_reg = '0;
logic [AXIS_DEST_W-1:0] m_axis_rd_data_tdest_reg = '0;
logic [AXIS_USER_W-1:0] m_axis_rd_data_tuser_reg = '0;
logic [OUTPUT_FIFO_AW+1-1:0] out_fifo_wr_ptr_reg = '0;
logic [OUTPUT_FIFO_AW+1-1:0] out_fifo_rd_ptr_reg = '0;
logic out_fifo_half_full_reg = 1'b0;
wire out_fifo_full = out_fifo_wr_ptr_reg == (out_fifo_rd_ptr_reg ^ {1'b1, {OUTPUT_FIFO_AW{1'b0}}});
wire out_fifo_empty = out_fifo_wr_ptr_reg == out_fifo_rd_ptr_reg;
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic [AXIS_DATA_W-1:0] out_fifo_tdata[2**OUTPUT_FIFO_AW];
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic [AXIS_KEEP_W-1:0] out_fifo_tkeep[2**OUTPUT_FIFO_AW];
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic out_fifo_tlast[2**OUTPUT_FIFO_AW];
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic [AXIS_ID_W-1:0] out_fifo_tid[2**OUTPUT_FIFO_AW];
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic [AXIS_DEST_W-1:0] out_fifo_tdest[2**OUTPUT_FIFO_AW];
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic [AXIS_USER_W-1:0] out_fifo_tuser[2**OUTPUT_FIFO_AW];
assign m_axis_rd_data_tready_int = !out_fifo_half_full_reg;
assign m_axis_rd_data.tdata = m_axis_rd_data_tdata_reg;
assign m_axis_rd_data.tkeep = AXIS_KEEP_EN ? m_axis_rd_data_tkeep_reg : '1;
assign m_axis_rd_data.tstrb = m_axis_rd_data.tkeep;
assign m_axis_rd_data.tvalid = m_axis_rd_data_tvalid_reg;
assign m_axis_rd_data.tlast = AXIS_LAST_EN ? m_axis_rd_data_tlast_reg : 1'b1;
assign m_axis_rd_data.tid = AXIS_ID_EN ? m_axis_rd_data_tid_reg : '0;
assign m_axis_rd_data.tdest = AXIS_DEST_EN ? m_axis_rd_data_tdest_reg : '0;
assign m_axis_rd_data.tuser = AXIS_USER_EN ? m_axis_rd_data_tuser_reg : '0;
always_ff @(posedge clk) begin
m_axis_rd_data_tvalid_reg <= m_axis_rd_data_tvalid_reg && !m_axis_rd_data.tready;
out_fifo_half_full_reg <= $unsigned(out_fifo_wr_ptr_reg - out_fifo_rd_ptr_reg) >= 2**(OUTPUT_FIFO_AW-1);
if (!out_fifo_full && m_axis_rd_data_tvalid_int) begin
out_fifo_tdata[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axis_rd_data_tdata_int;
out_fifo_tkeep[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axis_rd_data_tkeep_int;
out_fifo_tlast[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axis_rd_data_tlast_int;
out_fifo_tid[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axis_rd_data_tid_int;
out_fifo_tdest[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axis_rd_data_tdest_int;
out_fifo_tuser[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axis_rd_data_tuser_int;
out_fifo_wr_ptr_reg <= out_fifo_wr_ptr_reg + 1;
end
if (!out_fifo_empty && (!m_axis_rd_data_tvalid_reg || m_axis_rd_data.tready)) begin
m_axis_rd_data_tdata_reg <= out_fifo_tdata[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]];
m_axis_rd_data_tkeep_reg <= out_fifo_tkeep[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]];
m_axis_rd_data_tvalid_reg <= 1'b1;
m_axis_rd_data_tlast_reg <= out_fifo_tlast[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]];
m_axis_rd_data_tid_reg <= out_fifo_tid[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]];
m_axis_rd_data_tdest_reg <= out_fifo_tdest[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]];
m_axis_rd_data_tuser_reg <= out_fifo_tuser[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]];
out_fifo_rd_ptr_reg <= out_fifo_rd_ptr_reg + 1;
end
if (rst) begin
out_fifo_wr_ptr_reg <= '0;
out_fifo_rd_ptr_reg <= '0;
m_axis_rd_data_tvalid_reg <= 1'b0;
end
end
endmodule
`resetall

View File

@@ -0,0 +1,872 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2018-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* AXI4 DMA
*/
module taxi_axi_dma_wr #
(
// Maximum AXI burst length to generate
parameter AXI_MAX_BURST_LEN = 16,
// Enable support for unaligned transfers
parameter logic UNALIGNED_EN = 1'b1
)
(
input wire logic clk,
input wire logic rst,
/*
* DMA write descriptor
*/
taxi_dma_desc_if.req_snk wr_desc_req,
taxi_dma_desc_if.sts_src wr_desc_sts,
/*
* AXI stream write data input
*/
taxi_axis_if.snk s_axis_wr_data,
/*
* AXI4 master interface
*/
taxi_axi_if.wr_mst m_axi_wr,
/*
* Configuration
*/
input wire logic enable,
input wire logic abort
);
// extract parameters
localparam AXI_DATA_W = m_axi_wr.DATA_W;
localparam AXI_ADDR_W = m_axi_wr.ADDR_W;
localparam AXI_STRB_W = m_axi_wr.STRB_W;
localparam AXI_ID_W = m_axi_wr.ID_W;
localparam AXI_MAX_BURST_LEN_INT = AXI_MAX_BURST_LEN < m_axi_wr.MAX_BURST_LEN ? AXI_MAX_BURST_LEN : m_axi_wr.MAX_BURST_LEN;
localparam LEN_W = wr_desc_req.LEN_W;
localparam TAG_W = wr_desc_req.TAG_W;
localparam AXIS_DATA_W = s_axis_wr_data.DATA_W;
localparam AXIS_KEEP_EN = s_axis_wr_data.KEEP_EN;
localparam AXIS_KEEP_W = s_axis_wr_data.KEEP_W;
localparam AXIS_LAST_EN = s_axis_wr_data.LAST_EN;
localparam AXIS_ID_EN = s_axis_wr_data.ID_EN;
localparam AXIS_ID_W = s_axis_wr_data.ID_W;
localparam AXIS_DEST_EN = s_axis_wr_data.DEST_EN;
localparam AXIS_DEST_W = s_axis_wr_data.DEST_W;
localparam AXIS_USER_EN = s_axis_wr_data.USER_EN;
localparam AXIS_USER_W = s_axis_wr_data.USER_W;
localparam AXI_BYTE_LANES = AXI_STRB_W;
localparam AXI_BYTE_SIZE = AXI_DATA_W/AXI_BYTE_LANES;
localparam AXI_BURST_SIZE = $clog2(AXI_STRB_W);
localparam AXI_MAX_BURST_SIZE = AXI_MAX_BURST_LEN << AXI_BURST_SIZE;
localparam AXIS_KEEP_W_INT = AXIS_KEEP_EN ? AXIS_KEEP_W : 1;
localparam AXIS_BYTE_LANES = AXIS_KEEP_W_INT;
localparam AXIS_BYTE_SIZE = AXIS_DATA_W/AXIS_BYTE_LANES;
localparam OFFSET_W = AXI_STRB_W > 1 ? $clog2(AXI_STRB_W) : 1;
localparam OFFSET_MASK = AXI_STRB_W > 1 ? {OFFSET_W{1'b1}} : 0;
localparam ADDR_MASK = {AXI_ADDR_W{1'b1}} << $clog2(AXI_STRB_W);
localparam CYCLE_COUNT_W = 13 - AXI_BURST_SIZE;
localparam STATUS_FIFO_AW = 5;
localparam OUTPUT_FIFO_AW = 5;
// check configuration
if (AXI_BYTE_SIZE * AXI_STRB_W != AXI_DATA_W)
$fatal(0, "Error: AXI data width not evenly divisible (instance %m)");
if (AXIS_BYTE_SIZE * AXIS_KEEP_W_INT != AXIS_DATA_W)
$fatal(0, "Error: AXI stream data width not evenly divisible (instance %m)");
if (AXI_BYTE_SIZE != AXIS_BYTE_SIZE)
$fatal(0, "Error: word size mismatch (instance %m)");
if (2**$clog2(AXI_BYTE_LANES) != AXI_BYTE_LANES)
$fatal(0, "Error: AXI word width must be even power of two (instance %m)");
if (AXI_DATA_W != AXIS_DATA_W)
$fatal(0, "Error: AXI interface width must match AXI stream interface width (instance %m)");
if (AXI_MAX_BURST_LEN < 1 || AXI_MAX_BURST_LEN > 256)
$fatal(0, "Error: AXI_MAX_BURST_LEN must be between 1 and 256 (instance %m)");
if (wr_desc_req.DST_ADDR_W < AXI_ADDR_W)
$fatal(0, "Error: Descriptor address width is not sufficient (instance %m)");
localparam logic [1:0]
AXI_RESP_OKAY = 2'b00,
AXI_RESP_EXOKAY = 2'b01,
AXI_RESP_SLVERR = 2'b10,
AXI_RESP_DECERR = 2'b11;
localparam logic [3:0]
DMA_ERROR_NONE = 4'd0,
DMA_ERROR_TIMEOUT = 4'd1,
DMA_ERROR_PARITY = 4'd2,
DMA_ERROR_AXI_RD_SLVERR = 4'd4,
DMA_ERROR_AXI_RD_DECERR = 4'd5,
DMA_ERROR_AXI_WR_SLVERR = 4'd6,
DMA_ERROR_AXI_WR_DECERR = 4'd7,
DMA_ERROR_PCIE_FLR = 4'd8,
DMA_ERROR_PCIE_CPL_POISONED = 4'd9,
DMA_ERROR_PCIE_CPL_STATUS_UR = 4'd10,
DMA_ERROR_PCIE_CPL_STATUS_CA = 4'd11;
localparam logic [2:0]
STATE_IDLE = 3'd0,
STATE_START = 3'd1,
STATE_WRITE = 3'd2,
STATE_FINISH_BURST = 3'd3,
STATE_DROP_DATA = 3'd4;
logic [2:0] state_reg = STATE_IDLE, state_next;
// datapath control signals
logic transfer_in_save;
logic flush_save;
logic status_fifo_we;
logic [OFFSET_W:0] cycle_size;
logic [AXI_ADDR_W-1:0] addr_reg = '0, addr_next;
logic [LEN_W-1:0] op_count_reg = '0, op_count_next;
logic [12:0] tr_count_reg = '0, tr_count_next;
logic [OFFSET_W-1:0] offset_reg = '0, offset_next;
logic [AXI_STRB_W-1:0] strb_offset_mask_reg = '1, strb_offset_mask_next;
logic zero_offset_reg = 1'b1, zero_offset_next;
logic [OFFSET_W-1:0] last_cycle_offset_reg = '0, last_cycle_offset_next;
logic [LEN_W-1:0] length_reg = '0, length_next;
logic [CYCLE_COUNT_W-1:0] input_cycle_count_reg = '0, input_cycle_count_next;
logic [CYCLE_COUNT_W-1:0] output_cycle_count_reg = '0, output_cycle_count_next;
logic input_active_reg = 1'b0, input_active_next;
logic first_cycle_reg = 1'b0, first_cycle_next;
logic input_last_cycle_reg = 1'b0, input_last_cycle_next;
logic output_last_cycle_reg = 1'b0, output_last_cycle_next;
logic last_transfer_reg = 1'b0, last_transfer_next;
logic [1:0] bresp_reg = AXI_RESP_OKAY, bresp_next;
logic [TAG_W-1:0] tag_reg = '0, tag_next;
logic [AXIS_ID_W-1:0] axis_id_reg = '0, axis_id_next;
logic [AXIS_DEST_W-1:0] axis_dest_reg = '0, axis_dest_next;
logic [AXIS_USER_W-1:0] axis_user_reg = '0, axis_user_next;
logic [STATUS_FIFO_AW+1-1:0] status_fifo_wr_ptr_reg = '0;
logic [STATUS_FIFO_AW+1-1:0] status_fifo_rd_ptr_reg = '0, status_fifo_rd_ptr_next;
logic [LEN_W-1:0] status_fifo_len[2**STATUS_FIFO_AW];
logic [TAG_W-1:0] status_fifo_tag[2**STATUS_FIFO_AW];
logic [AXIS_ID_W-1:0] status_fifo_id[2**STATUS_FIFO_AW];
logic [AXIS_DEST_W-1:0] status_fifo_dest[2**STATUS_FIFO_AW];
logic [AXIS_USER_W-1:0] status_fifo_user[2**STATUS_FIFO_AW];
logic status_fifo_last[2**STATUS_FIFO_AW];
logic [LEN_W-1:0] status_fifo_wr_len;
logic [TAG_W-1:0] status_fifo_wr_tag;
logic [AXIS_ID_W-1:0] status_fifo_wr_id;
logic [AXIS_DEST_W-1:0] status_fifo_wr_dest;
logic [AXIS_USER_W-1:0] status_fifo_wr_user;
logic status_fifo_wr_last;
logic [STATUS_FIFO_AW+1-1:0] active_count_reg = 0;
logic active_count_av_reg = 1'b1;
logic inc_active;
logic dec_active;
logic wr_desc_req_ready_reg = 1'b0, wr_desc_req_ready_next;
logic [LEN_W-1:0] wr_desc_sts_len_reg = '0, wr_desc_sts_len_next;
logic [TAG_W-1:0] wr_desc_sts_tag_reg = '0, wr_desc_sts_tag_next;
logic [AXIS_ID_W-1:0] wr_desc_sts_id_reg = '0, wr_desc_sts_id_next;
logic [AXIS_DEST_W-1:0] wr_desc_sts_dest_reg = '0, wr_desc_sts_dest_next;
logic [AXIS_USER_W-1:0] wr_desc_sts_user_reg = '0, wr_desc_sts_user_next;
logic [3:0] wr_desc_sts_error_reg = 4'd0, wr_desc_sts_error_next;
logic wr_desc_sts_valid_reg = 1'b0, wr_desc_sts_valid_next;
logic [AXI_ADDR_W-1:0] m_axi_awaddr_reg = '0, m_axi_awaddr_next;
logic [7:0] m_axi_awlen_reg = 8'd0, m_axi_awlen_next;
logic m_axi_awvalid_reg = 1'b0, m_axi_awvalid_next;
logic m_axi_bready_reg = 1'b0, m_axi_bready_next;
logic s_axis_wr_data_tready_reg = 1'b0, s_axis_wr_data_tready_next;
logic [AXIS_DATA_W-1:0] save_axis_tdata_reg = '0;
logic [AXIS_KEEP_W_INT-1:0] save_axis_tkeep_reg = '0;
logic save_axis_tlast_reg = 1'b0;
logic [AXIS_DATA_W-1:0] shift_axis_tdata;
logic [AXIS_KEEP_W_INT-1:0] shift_axis_tkeep;
logic shift_axis_tvalid;
logic shift_axis_tlast;
logic shift_axis_input_tready;
logic shift_axis_extra_cycle_reg = 1'b0;
// internal datapath
logic [AXI_DATA_W-1:0] m_axi_wdata_int;
logic [AXI_STRB_W-1:0] m_axi_wstrb_int;
logic m_axi_wlast_int;
logic m_axi_wvalid_int;
wire m_axi_wready_int;
assign wr_desc_req.req_ready = wr_desc_req_ready_reg;
assign wr_desc_sts.sts_len = wr_desc_sts_len_reg;
assign wr_desc_sts.sts_tag = wr_desc_sts_tag_reg;
assign wr_desc_sts.sts_id = wr_desc_sts_id_reg;
assign wr_desc_sts.sts_dest = wr_desc_sts_dest_reg;
assign wr_desc_sts.sts_user = wr_desc_sts_user_reg;
assign wr_desc_sts.sts_error = wr_desc_sts_error_reg;
assign wr_desc_sts.sts_valid = wr_desc_sts_valid_reg;
assign s_axis_wr_data.tready = s_axis_wr_data_tready_reg;
assign m_axi_wr.awid = '0;
assign m_axi_wr.awaddr = m_axi_awaddr_reg;
assign m_axi_wr.awlen = m_axi_awlen_reg;
assign m_axi_wr.awsize = 3'(AXI_BURST_SIZE);
assign m_axi_wr.awburst = 2'b01;
assign m_axi_wr.awlock = 1'b0;
assign m_axi_wr.awcache = 4'b0011;
assign m_axi_wr.awprot = 3'b010;
assign m_axi_wr.awvalid = m_axi_awvalid_reg;
assign m_axi_wr.bready = m_axi_bready_reg;
if (!UNALIGNED_EN || AXI_STRB_W == 1) begin : shift
always_comb begin
shift_axis_tdata = s_axis_wr_data.tdata;
shift_axis_tkeep = s_axis_wr_data.tkeep;
shift_axis_tvalid = s_axis_wr_data.tvalid;
shift_axis_tlast = AXIS_LAST_EN && s_axis_wr_data.tlast;
shift_axis_input_tready = 1'b1;
end
end else begin : shift
wire [AXIS_DATA_W*2-1:0] tdata_full = {s_axis_wr_data.tdata, save_axis_tdata_reg};
wire [AXIS_KEEP_W*2-1:0] tkeep_full = {s_axis_wr_data.tkeep, save_axis_tkeep_reg};
wire [AXIS_KEEP_W*2-1:0] tkeep_mask = {{AXIS_KEEP_W_INT{1'b0}}, save_axis_tkeep_reg};
always_comb begin
if (zero_offset_reg) begin
// passthrough if no overlap
shift_axis_tdata = s_axis_wr_data.tdata;
shift_axis_tkeep = s_axis_wr_data.tkeep;
shift_axis_tvalid = s_axis_wr_data.tvalid;
shift_axis_tlast = AXIS_LAST_EN && s_axis_wr_data.tlast;
shift_axis_input_tready = 1'b1;
end else if (!AXIS_LAST_EN) begin
shift_axis_tdata = tdata_full[(OFFSET_W+1)'(AXIS_KEEP_W_INT-offset_reg)*AXIS_BYTE_SIZE +: AXIS_DATA_W];
shift_axis_tkeep = tkeep_full[(OFFSET_W+1)'(AXIS_KEEP_W_INT-offset_reg) +: AXIS_KEEP_W];
shift_axis_tvalid = s_axis_wr_data.tvalid;
shift_axis_tlast = 1'b0;
shift_axis_input_tready = 1'b1;
end else if (shift_axis_extra_cycle_reg) begin
shift_axis_tdata = tdata_full[(OFFSET_W+1)'(AXIS_KEEP_W_INT-offset_reg)*AXIS_BYTE_SIZE +: AXIS_DATA_W];
shift_axis_tkeep = tkeep_mask[(OFFSET_W+1)'(AXIS_KEEP_W_INT-offset_reg) +: AXIS_KEEP_W];
shift_axis_tvalid = 1'b1;
shift_axis_tlast = save_axis_tlast_reg;
shift_axis_input_tready = flush_save;
end else begin
shift_axis_tdata = tdata_full[(OFFSET_W+1)'(AXIS_KEEP_W_INT-offset_reg)*AXIS_BYTE_SIZE +: AXIS_DATA_W];
shift_axis_tkeep = tkeep_full[(OFFSET_W+1)'(AXIS_KEEP_W_INT-offset_reg) +: AXIS_KEEP_W];
shift_axis_tvalid = s_axis_wr_data.tvalid;
shift_axis_tlast = (s_axis_wr_data.tlast && ((s_axis_wr_data.tkeep & ({AXIS_KEEP_W_INT{1'b1}} << ((OFFSET_W+1)'(AXIS_KEEP_W_INT)-offset_reg))) == 0));
shift_axis_input_tready = !(s_axis_wr_data.tlast && s_axis_wr_data.tready && s_axis_wr_data.tvalid);
end
end
end
always_comb begin
state_next = STATE_IDLE;
wr_desc_req_ready_next = 1'b0;
wr_desc_sts_len_next = wr_desc_sts_len_reg;
wr_desc_sts_tag_next = wr_desc_sts_tag_reg;
wr_desc_sts_id_next = wr_desc_sts_id_reg;
wr_desc_sts_dest_next = wr_desc_sts_dest_reg;
wr_desc_sts_user_next = wr_desc_sts_user_reg;
wr_desc_sts_error_next = wr_desc_sts_error_reg;
wr_desc_sts_valid_next = 1'b0;
s_axis_wr_data_tready_next = 1'b0;
m_axi_awaddr_next = m_axi_awaddr_reg;
m_axi_awlen_next = m_axi_awlen_reg;
m_axi_awvalid_next = m_axi_awvalid_reg && !m_axi_wr.awready;
m_axi_wdata_int = shift_axis_tdata;
m_axi_wstrb_int = shift_axis_tkeep;
m_axi_wlast_int = 1'b0;
m_axi_wvalid_int = 1'b0;
m_axi_bready_next = 1'b0;
transfer_in_save = 1'b0;
flush_save = 1'b0;
status_fifo_we = 1'b0;
cycle_size = (OFFSET_W+1)'(AXIS_KEEP_W_INT);
addr_next = addr_reg;
offset_next = offset_reg;
strb_offset_mask_next = strb_offset_mask_reg;
zero_offset_next = zero_offset_reg;
last_cycle_offset_next = last_cycle_offset_reg;
length_next = length_reg;
op_count_next = op_count_reg;
tr_count_next = tr_count_reg;
input_cycle_count_next = input_cycle_count_reg;
output_cycle_count_next = output_cycle_count_reg;
input_active_next = input_active_reg;
first_cycle_next = first_cycle_reg;
input_last_cycle_next = input_last_cycle_reg;
output_last_cycle_next = output_last_cycle_reg;
last_transfer_next = last_transfer_reg;
status_fifo_rd_ptr_next = status_fifo_rd_ptr_reg;
inc_active = 1'b0;
dec_active = 1'b0;
tag_next = tag_reg;
axis_id_next = axis_id_reg;
axis_dest_next = axis_dest_reg;
axis_user_next = axis_user_reg;
status_fifo_wr_len = length_reg;
status_fifo_wr_tag = tag_reg;
status_fifo_wr_id = axis_id_reg;
status_fifo_wr_dest = axis_dest_reg;
status_fifo_wr_user = axis_user_reg;
status_fifo_wr_last = 1'b0;
if (m_axi_wr.bready && m_axi_wr.bvalid && (m_axi_wr.bresp == AXI_RESP_SLVERR || m_axi_wr.bresp == AXI_RESP_DECERR)) begin
bresp_next = m_axi_wr.bresp;
end else begin
bresp_next = bresp_reg;
end
case (state_reg)
STATE_IDLE: begin
// idle state - load new descriptor to start operation
flush_save = 1'b1;
wr_desc_req_ready_next = enable && active_count_av_reg;
if (UNALIGNED_EN) begin
addr_next = wr_desc_req.req_dst_addr;
offset_next = OFFSET_W'(wr_desc_req.req_dst_addr & OFFSET_MASK);
strb_offset_mask_next = {AXI_STRB_W{1'b1}} << OFFSET_W'(wr_desc_req.req_dst_addr & OFFSET_MASK);
zero_offset_next = OFFSET_W'(wr_desc_req.req_dst_addr & OFFSET_MASK) == 0;
last_cycle_offset_next = offset_next + OFFSET_W'(wr_desc_req.req_len & OFFSET_MASK);
end else begin
addr_next = wr_desc_req.req_dst_addr & ADDR_MASK;
offset_next = '0;
strb_offset_mask_next = '1;
zero_offset_next = 1'b1;
last_cycle_offset_next = offset_next + OFFSET_W'(wr_desc_req.req_len & OFFSET_MASK);
end
tag_next = wr_desc_req.req_tag;
op_count_next = wr_desc_req.req_len;
first_cycle_next = 1'b1;
length_next = 0;
if (wr_desc_req.req_ready && wr_desc_req.req_valid) begin
wr_desc_req_ready_next = 1'b0;
state_next = STATE_START;
end else begin
state_next = STATE_IDLE;
end
end
STATE_START: begin
// start state - initiate new AXI transfer
if (op_count_reg <= LEN_W'(AXI_MAX_BURST_SIZE) - LEN_W'(addr_reg & OFFSET_MASK) || AXI_MAX_BURST_SIZE >= 4096) begin
// packet smaller than max burst size
if ((12'(addr_reg & 12'hfff) + 12'(op_count_reg & 12'hfff)) >> 12 != 0 || op_count_reg >> 12 != 0) begin
// crosses 4k boundary
tr_count_next = 13'h1000 - 12'(addr_reg & 12'hfff);
end else begin
// does not cross 4k boundary
tr_count_next = 13'(op_count_reg);
end
end else begin
// packet larger than max burst size
if ((12'(addr_reg & 12'hfff) + 12'(AXI_MAX_BURST_SIZE)) >> 12 != 0) begin
// crosses 4k boundary
tr_count_next = 13'h1000 - 12'(addr_reg & 12'hfff);
end else begin
// does not cross 4k boundary
tr_count_next = 13'(AXI_MAX_BURST_SIZE) - 13'(addr_reg & OFFSET_MASK);
end
end
input_cycle_count_next = CYCLE_COUNT_W'((tr_count_next - 13'd1) >> $clog2(AXIS_KEEP_W_INT));
input_last_cycle_next = input_cycle_count_next == 0;
if (UNALIGNED_EN) begin
output_cycle_count_next = CYCLE_COUNT_W'((tr_count_next + 13'(addr_reg & OFFSET_MASK) - 13'd1) >> AXI_BURST_SIZE);
end else begin
output_cycle_count_next = CYCLE_COUNT_W'((tr_count_next - 13'd1) >> AXI_BURST_SIZE);
end
output_last_cycle_next = output_cycle_count_next == 0;
last_transfer_next = LEN_W'(tr_count_next) == op_count_reg;
input_active_next = 1'b1;
if (UNALIGNED_EN) begin
if (!first_cycle_reg && last_transfer_next) begin
if (offset_reg >= last_cycle_offset_reg && last_cycle_offset_reg > 0) begin
// last cycle will be served by stored partial cycle
input_active_next = input_cycle_count_next > 0;
input_cycle_count_next = input_cycle_count_next - 1;
end
end
end
if (!m_axi_awvalid_reg && active_count_av_reg) begin
m_axi_awaddr_next = addr_reg;
m_axi_awlen_next = 8'(output_cycle_count_next);
m_axi_awvalid_next = s_axis_wr_data.tvalid || !first_cycle_reg;
if (m_axi_awvalid_next) begin
addr_next = addr_reg + AXI_ADDR_W'(tr_count_next);
op_count_next = op_count_reg - LEN_W'(tr_count_next);
s_axis_wr_data_tready_next = m_axi_wready_int && input_active_next;
inc_active = 1'b1;
state_next = STATE_WRITE;
end else begin
state_next = STATE_START;
end
end else begin
state_next = STATE_START;
end
end
STATE_WRITE: begin
s_axis_wr_data_tready_next = m_axi_wready_int && (last_transfer_reg || input_active_reg) && shift_axis_input_tready;
if ((s_axis_wr_data.tready && shift_axis_tvalid) || (!input_active_reg && !last_transfer_reg) || !shift_axis_input_tready) begin
if (s_axis_wr_data.tready && s_axis_wr_data.tvalid) begin
transfer_in_save = 1'b1;
axis_id_next = s_axis_wr_data.tid;
axis_dest_next = s_axis_wr_data.tdest;
axis_user_next = s_axis_wr_data.tuser;
end
// update counters
if (first_cycle_reg) begin
length_next = length_reg + LEN_W'(AXIS_KEEP_W_INT - offset_reg);
end else begin
length_next = length_reg + LEN_W'(AXIS_KEEP_W_INT);
end
if (input_active_reg) begin
input_cycle_count_next = input_cycle_count_reg - 1;
input_active_next = input_cycle_count_reg > 0;
end
input_last_cycle_next = input_cycle_count_next == 0;
output_cycle_count_next = output_cycle_count_reg - 1;
output_last_cycle_next = output_cycle_count_next == 0;
first_cycle_next = 1'b0;
strb_offset_mask_next = '1;
m_axi_wdata_int = shift_axis_tdata;
m_axi_wstrb_int = strb_offset_mask_reg;
m_axi_wvalid_int = 1'b1;
if (AXIS_LAST_EN && s_axis_wr_data.tlast) begin
// end of input frame
input_active_next = 1'b0;
s_axis_wr_data_tready_next = 1'b0;
end
if (AXIS_LAST_EN && shift_axis_tlast) begin
// end of data packet
cycle_size = (OFFSET_W+1)'(AXIS_KEEP_W_INT);
if (AXIS_KEEP_EN) begin
for (integer i = AXIS_KEEP_W_INT-1; i >= 0; i = i - 1) begin
if ((~shift_axis_tkeep & strb_offset_mask_reg & (1 << i)) != 0) begin
cycle_size = (OFFSET_W+1)'(i);
end
end
end
if (output_last_cycle_reg) begin
m_axi_wlast_int = 1'b1;
// no more data to transfer, finish operation
if (last_transfer_reg && last_cycle_offset_reg > 0) begin
if (AXIS_KEEP_EN && (shift_axis_tkeep & ~({AXI_STRB_W{1'b1}} >> ((OFFSET_W+1)'(AXI_STRB_W) - last_cycle_offset_reg))) == 0) begin
m_axi_wstrb_int = strb_offset_mask_reg & shift_axis_tkeep;
if (first_cycle_reg) begin
length_next = length_reg + LEN_W'(cycle_size - offset_reg);
end else begin
length_next = length_reg + LEN_W'(cycle_size);
end
end else begin
m_axi_wstrb_int = strb_offset_mask_reg & {AXI_STRB_W{1'b1}} >> ((OFFSET_W+1)'(AXI_STRB_W) - last_cycle_offset_reg);
if (first_cycle_reg) begin
length_next = length_reg + LEN_W'(last_cycle_offset_reg - offset_reg);
end else begin
length_next = length_reg + LEN_W'(last_cycle_offset_reg);
end
end
end else begin
if (AXIS_KEEP_EN) begin
m_axi_wstrb_int = strb_offset_mask_reg & shift_axis_tkeep;
if (first_cycle_reg) begin
length_next = length_reg + LEN_W'(cycle_size - offset_reg);
end else begin
length_next = length_reg + LEN_W'(cycle_size);
end
end
end
// enqueue status FIFO entry for write completion
status_fifo_we = 1'b1;
status_fifo_wr_len = length_next;
status_fifo_wr_tag = tag_reg;
status_fifo_wr_id = axis_id_next;
status_fifo_wr_dest = axis_dest_next;
status_fifo_wr_user = axis_user_next;
status_fifo_wr_last = 1'b1;
s_axis_wr_data_tready_next = 1'b0;
wr_desc_req_ready_next = enable && active_count_av_reg;
state_next = STATE_IDLE;
end else begin
// more cycles left in burst, finish burst
if (AXIS_KEEP_EN) begin
m_axi_wstrb_int = strb_offset_mask_reg & shift_axis_tkeep;
if (first_cycle_reg) begin
length_next = length_reg + LEN_W'(cycle_size - offset_reg);
end else begin
length_next = length_reg + LEN_W'(cycle_size);
end
end
// enqueue status FIFO entry for write completion
status_fifo_we = 1'b1;
status_fifo_wr_len = length_next;
status_fifo_wr_tag = tag_reg;
status_fifo_wr_id = axis_id_next;
status_fifo_wr_dest = axis_dest_next;
status_fifo_wr_user = axis_user_next;
status_fifo_wr_last = 1'b1;
s_axis_wr_data_tready_next = 1'b0;
state_next = STATE_FINISH_BURST;
end
end else if (output_last_cycle_reg) begin
m_axi_wlast_int = 1'b1;
if (op_count_reg > 0) begin
// current AXI transfer complete, but there is more data to transfer
// enqueue status FIFO entry for write completion
status_fifo_we = 1'b1;
status_fifo_wr_len = length_next;
status_fifo_wr_tag = tag_reg;
status_fifo_wr_id = axis_id_next;
status_fifo_wr_dest = axis_dest_next;
status_fifo_wr_user = axis_user_next;
status_fifo_wr_last = 1'b0;
s_axis_wr_data_tready_next = 1'b0;
state_next = STATE_START;
end else begin
// no more data to transfer, finish operation
if (last_cycle_offset_reg > 0) begin
m_axi_wstrb_int = strb_offset_mask_reg & {AXI_STRB_W{1'b1}} >> ((OFFSET_W+1)'(AXI_STRB_W) - last_cycle_offset_reg);
if (first_cycle_reg) begin
length_next = length_reg + LEN_W'(last_cycle_offset_reg - offset_reg);
end else begin
length_next = length_reg + LEN_W'(last_cycle_offset_reg);
end
end
// enqueue status FIFO entry for write completion
status_fifo_we = 1'b1;
status_fifo_wr_len = length_next;
status_fifo_wr_tag = tag_reg;
status_fifo_wr_id = axis_id_next;
status_fifo_wr_dest = axis_dest_next;
status_fifo_wr_user = axis_user_next;
status_fifo_wr_last = 1'b1;
if (AXIS_LAST_EN) begin
// not at the end of packet; drop remainder
s_axis_wr_data_tready_next = shift_axis_input_tready;
state_next = STATE_DROP_DATA;
end else begin
// no framing; return to idle
s_axis_wr_data_tready_next = 1'b0;
wr_desc_req_ready_next = enable && active_count_av_reg;
state_next = STATE_IDLE;
end
end
end else begin
s_axis_wr_data_tready_next = m_axi_wready_int && (last_transfer_reg || input_active_next) && shift_axis_input_tready;
state_next = STATE_WRITE;
end
end else begin
state_next = STATE_WRITE;
end
end
STATE_FINISH_BURST: begin
// finish current AXI burst
if (m_axi_wready_int) begin
// update counters
if (input_active_reg) begin
input_cycle_count_next = input_cycle_count_reg - 1;
input_active_next = input_cycle_count_reg > 0;
end
input_last_cycle_next = input_cycle_count_next == 0;
output_cycle_count_next = output_cycle_count_reg - 1;
output_last_cycle_next = output_cycle_count_next == 0;
m_axi_wdata_int = '0;
m_axi_wstrb_int = '0;
m_axi_wvalid_int = 1'b1;
if (output_last_cycle_reg) begin
// no more data to transfer, finish operation
m_axi_wlast_int = 1'b1;
s_axis_wr_data_tready_next = 1'b0;
wr_desc_req_ready_next = enable && active_count_av_reg;
state_next = STATE_IDLE;
end else begin
// more cycles in AXI transfer
state_next = STATE_FINISH_BURST;
end
end else begin
state_next = STATE_FINISH_BURST;
end
end
STATE_DROP_DATA: begin
// drop excess AXI stream data
s_axis_wr_data_tready_next = shift_axis_input_tready;
if (shift_axis_tvalid) begin
if (s_axis_wr_data.tready && s_axis_wr_data.tvalid) begin
transfer_in_save = 1'b1;
end
if (shift_axis_tlast) begin
s_axis_wr_data_tready_next = 1'b0;
wr_desc_req_ready_next = enable && active_count_av_reg;
state_next = STATE_IDLE;
end else begin
state_next = STATE_DROP_DATA;
end
end else begin
state_next = STATE_DROP_DATA;
end
end
default: begin
// invalid state
state_next = STATE_IDLE;
end
endcase
if (status_fifo_rd_ptr_reg != status_fifo_wr_ptr_reg) begin
// status FIFO not empty
if (m_axi.bready && m_axi.bvalid) begin
// got write completion, pop and return status
wr_desc_sts_len_next = status_fifo_len[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]];
wr_desc_sts_tag_next = status_fifo_tag[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]];
wr_desc_sts_id_next = status_fifo_id[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]];
wr_desc_sts_dest_next = status_fifo_dest[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]];
wr_desc_sts_user_next = status_fifo_user[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]];
if (bresp_next == AXI_RESP_SLVERR) begin
wr_desc_sts_error_next = DMA_ERROR_AXI_WR_SLVERR;
end else if (bresp_next == AXI_RESP_DECERR) begin
wr_desc_sts_error_next = DMA_ERROR_AXI_WR_DECERR;
end else begin
wr_desc_sts_error_next = DMA_ERROR_NONE;
end
wr_desc_sts_valid_next = status_fifo_last[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]];
status_fifo_rd_ptr_next = status_fifo_rd_ptr_reg + 1;
m_axi_bready_next = 1'b0;
if (status_fifo_last[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]]) begin
bresp_next = AXI_RESP_OKAY;
end
dec_active = 1'b1;
end else begin
// wait for write completion
m_axi_bready_next = 1'b1;
end
end
end
always_ff @(posedge clk) begin
state_reg <= state_next;
wr_desc_req_ready_reg <= wr_desc_req_ready_next;
wr_desc_sts_len_reg <= wr_desc_sts_len_next;
wr_desc_sts_tag_reg <= wr_desc_sts_tag_next;
wr_desc_sts_id_reg <= wr_desc_sts_id_next;
wr_desc_sts_dest_reg <= wr_desc_sts_dest_next;
wr_desc_sts_user_reg <= wr_desc_sts_user_next;
wr_desc_sts_error_reg <= wr_desc_sts_error_next;
wr_desc_sts_valid_reg <= wr_desc_sts_valid_next;
s_axis_wr_data_tready_reg <= s_axis_wr_data_tready_next;
m_axi_awaddr_reg <= m_axi_awaddr_next;
m_axi_awlen_reg <= m_axi_awlen_next;
m_axi_awvalid_reg <= m_axi_awvalid_next;
m_axi_bready_reg <= m_axi_bready_next;
addr_reg <= addr_next;
offset_reg <= offset_next;
strb_offset_mask_reg <= strb_offset_mask_next;
zero_offset_reg <= zero_offset_next;
last_cycle_offset_reg <= last_cycle_offset_next;
length_reg <= length_next;
op_count_reg <= op_count_next;
tr_count_reg <= tr_count_next;
input_cycle_count_reg <= input_cycle_count_next;
output_cycle_count_reg <= output_cycle_count_next;
input_active_reg <= input_active_next;
first_cycle_reg <= first_cycle_next;
input_last_cycle_reg <= input_last_cycle_next;
output_last_cycle_reg <= output_last_cycle_next;
last_transfer_reg <= last_transfer_next;
bresp_reg <= bresp_next;
tag_reg <= tag_next;
axis_id_reg <= axis_id_next;
axis_dest_reg <= axis_dest_next;
axis_user_reg <= axis_user_next;
// datapath
if (flush_save) begin
save_axis_tkeep_reg <= '0;
save_axis_tlast_reg <= 1'b0;
shift_axis_extra_cycle_reg <= 1'b0;
end else if (transfer_in_save) begin
save_axis_tdata_reg <= s_axis_wr_data.tdata;
save_axis_tkeep_reg <= AXIS_KEEP_EN ? s_axis_wr_data.tkeep : '1;
save_axis_tlast_reg <= s_axis_wr_data.tlast;
shift_axis_extra_cycle_reg <= s_axis_wr_data.tlast & ((s_axis_wr_data.tkeep >> ((OFFSET_W+1)'(AXIS_KEEP_W_INT) - offset_reg)) != 0);
end
if (status_fifo_we) begin
status_fifo_len[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_len;
status_fifo_tag[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_tag;
status_fifo_id[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_id;
status_fifo_dest[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_dest;
status_fifo_user[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_user;
status_fifo_last[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_last;
status_fifo_wr_ptr_reg <= status_fifo_wr_ptr_reg + 1;
end
status_fifo_rd_ptr_reg <= status_fifo_rd_ptr_next;
if (active_count_reg < 2**STATUS_FIFO_AW && inc_active && !dec_active) begin
active_count_reg <= active_count_reg + 1;
active_count_av_reg <= active_count_reg < (2**STATUS_FIFO_AW-1);
end else if (active_count_reg > 0 && !inc_active && dec_active) begin
active_count_reg <= active_count_reg - 1;
active_count_av_reg <= 1'b1;
end else begin
active_count_av_reg <= active_count_reg < 2**STATUS_FIFO_AW;
end
if (rst) begin
state_reg <= STATE_IDLE;
wr_desc_req_ready_reg <= 1'b0;
wr_desc_sts_valid_reg <= 1'b0;
s_axis_wr_data_tready_reg <= 1'b0;
m_axi_awvalid_reg <= 1'b0;
m_axi_bready_reg <= 1'b0;
bresp_reg <= AXI_RESP_OKAY;
save_axis_tlast_reg <= 1'b0;
shift_axis_extra_cycle_reg <= 1'b0;
status_fifo_wr_ptr_reg <= 0;
status_fifo_rd_ptr_reg <= 0;
active_count_reg <= 0;
active_count_av_reg <= 1'b1;
end
end
// output datapath logic
logic [AXI_DATA_W-1:0] m_axi_wdata_reg = '0;
logic [AXI_STRB_W-1:0] m_axi_wstrb_reg = '0;
logic m_axi_wlast_reg = 1'b0;
logic m_axi_wvalid_reg = 1'b0;
logic [OUTPUT_FIFO_AW+1-1:0] out_fifo_wr_ptr_reg = '0;
logic [OUTPUT_FIFO_AW+1-1:0] out_fifo_rd_ptr_reg = '0;
logic out_fifo_half_full_reg = 1'b0;
wire out_fifo_full = out_fifo_wr_ptr_reg == (out_fifo_rd_ptr_reg ^ {1'b1, {OUTPUT_FIFO_AW{1'b0}}});
wire out_fifo_empty = out_fifo_wr_ptr_reg == out_fifo_rd_ptr_reg;
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic [AXI_DATA_W-1:0] out_fifo_wdata[2**OUTPUT_FIFO_AW];
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic [AXI_STRB_W-1:0] out_fifo_wstrb[2**OUTPUT_FIFO_AW];
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic out_fifo_wlast[2**OUTPUT_FIFO_AW];
assign m_axi_wready_int = !out_fifo_half_full_reg;
assign m_axi_wr.wdata = m_axi_wdata_reg;
assign m_axi_wr.wstrb = m_axi_wstrb_reg;
assign m_axi_wr.wvalid = m_axi_wvalid_reg;
assign m_axi_wr.wlast = m_axi_wlast_reg;
always_ff @(posedge clk) begin
m_axi_wvalid_reg <= m_axi_wvalid_reg && !m_axi_wr.wready;
out_fifo_half_full_reg <= $unsigned(out_fifo_wr_ptr_reg - out_fifo_rd_ptr_reg) >= 2**(OUTPUT_FIFO_AW-1);
if (!out_fifo_full && m_axi_wvalid_int) begin
out_fifo_wdata[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axi_wdata_int;
out_fifo_wstrb[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axi_wstrb_int;
out_fifo_wlast[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axi_wlast_int;
out_fifo_wr_ptr_reg <= out_fifo_wr_ptr_reg + 1;
end
if (!out_fifo_empty && (!m_axi_wvalid_reg || m_axi_wr.wready)) begin
m_axi_wdata_reg <= out_fifo_wdata[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]];
m_axi_wstrb_reg <= out_fifo_wstrb[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]];
m_axi_wlast_reg <= out_fifo_wlast[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]];
m_axi_wvalid_reg <= 1'b1;
out_fifo_rd_ptr_reg <= out_fifo_rd_ptr_reg + 1;
end
if (rst) begin
out_fifo_wr_ptr_reg <= '0;
out_fifo_rd_ptr_reg <= '0;
m_axi_wvalid_reg <= 1'b0;
end
end
endmodule
`resetall

View File

@@ -0,0 +1,67 @@
# SPDX-License-Identifier: CERN-OHL-S-2.0
#
# Copyright (c) 2020-2025 FPGA Ninja, LLC
#
# Authors:
# - Alex Forencich
TOPLEVEL_LANG = verilog
SIM ?= verilator
WAVES ?= 0
COCOTB_HDL_TIMEUNIT = 1ns
COCOTB_HDL_TIMEPRECISION = 1ps
RTL_DIR = ../../rtl
LIB_DIR = ../../lib
TAXI_SRC_DIR = $(LIB_DIR)/taxi/src
DUT = taxi_axi_dma
COCOTB_TEST_MODULES = test_$(DUT)
COCOTB_TOPLEVEL = test_$(DUT)
MODULE = $(COCOTB_TEST_MODULES)
TOPLEVEL = $(COCOTB_TOPLEVEL)
VERILOG_SOURCES += $(COCOTB_TOPLEVEL).sv
VERILOG_SOURCES += $(RTL_DIR)/$(DUT).f
# handle file list files
process_f_file = $(call process_f_files,$(addprefix $(dir $1),$(shell cat $1)))
process_f_files = $(foreach f,$1,$(if $(filter %.f,$f),$(call process_f_file,$f),$f))
uniq_base = $(if $1,$(call uniq_base,$(foreach f,$1,$(if $(filter-out $(notdir $(lastword $1)),$(notdir $f)),$f,))) $(lastword $1))
VERILOG_SOURCES := $(call uniq_base,$(call process_f_files,$(VERILOG_SOURCES)))
# module parameters
export PARAM_AXI_DATA_W := 32
export PARAM_AXI_ADDR_W := 16
export PARAM_AXI_STRB_W := $(shell expr $(PARAM_AXI_DATA_W) / 8 )
export PARAM_AXI_ID_W := 8
export PARAM_AXI_MAX_BURST_LEN := 16
export PARAM_AXIS_DATA_W := $(PARAM_AXI_DATA_W)
export PARAM_AXIS_KEEP_EN := $(shell expr $(PARAM_AXIS_DATA_W) \> 8 )
export PARAM_AXIS_KEEP_W := $(shell expr $(PARAM_AXIS_DATA_W) / 8 )
export PARAM_AXIS_LAST_EN := 1
export PARAM_AXIS_ID_EN := 1
export PARAM_AXIS_ID_W := 8
export PARAM_AXIS_DEST_EN := 0
export PARAM_AXIS_DEST_W := 8
export PARAM_AXIS_USER_EN := 1
export PARAM_AXIS_USER_W := 1
export PARAM_LEN_W := 20
export PARAM_TAG_W := 8
export PARAM_UNALIGNED_EN := 1
ifeq ($(SIM), icarus)
PLUSARGS += -fst
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(COCOTB_TOPLEVEL).$(subst PARAM_,,$(v))=$($(v)))
else ifeq ($(SIM), verilator)
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v)))
ifeq ($(WAVES), 1)
COMPILE_ARGS += --trace-fst
VERILATOR_TRACE = 1
endif
endif
include $(shell cocotb-config --makefiles)/Makefile.sim

View File

@@ -0,0 +1,284 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: CERN-OHL-S-2.0
"""
Copyright (c) 2020-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
"""
import itertools
import logging
import os
import cocotb_test.simulator
import pytest
import cocotb
from cocotb.clock import Clock
from cocotb.triggers import RisingEdge
from cocotb.regression import TestFactory
from cocotbext.axi import AxiBus, AxiRam
from cocotbext.axi import AxiStreamBus, AxiStreamFrame, AxiStreamSource, AxiStreamSink
from cocotbext.axi.stream import define_stream
DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc",
signals=["req_src_addr", "req_dst_addr", "req_len", "req_tag", "req_valid", "req_ready"],
optional_signals=["req_id", "req_dest", "req_user"]
)
DescStatusBus, DescStatusTransaction, DescStatusSource, DescStatusSink, DescStatusMonitor = define_stream("DescStatus",
signals=["sts_tag", "sts_error", "sts_valid"],
optional_signals=["sts_len", "sts_id", "sts_dest", "sts_user"]
)
class TB(object):
def __init__(self, dut):
self.dut = dut
self.log = logging.getLogger("cocotb.tb")
self.log.setLevel(logging.DEBUG)
cocotb.start_soon(Clock(dut.clk, 10, units="ns").start())
# read interface
self.read_desc_source = DescSource(DescBus.from_entity(dut.rd_desc), dut.clk, dut.rst)
self.read_desc_status_sink = DescStatusSink(DescStatusBus.from_entity(dut.rd_desc), dut.clk, dut.rst)
self.read_data_sink = AxiStreamSink(AxiStreamBus.from_entity(dut.m_axis_rd_data), dut.clk, dut.rst)
# write interface
self.write_desc_source = DescSource(DescBus.from_entity(dut.wr_desc), dut.clk, dut.rst)
self.write_desc_status_sink = DescStatusSink(DescStatusBus.from_entity(dut.wr_desc), dut.clk, dut.rst)
self.write_data_source = AxiStreamSource(AxiStreamBus.from_entity(dut.s_axis_wr_data), dut.clk, dut.rst)
# AXI interface
self.axi_ram = AxiRam(AxiBus.from_entity(dut.m_axi), dut.clk, dut.rst, size=2**16)
dut.read_enable.setimmediatevalue(0)
dut.write_enable.setimmediatevalue(0)
dut.write_abort.setimmediatevalue(0)
def set_idle_generator(self, generator=None):
if generator:
self.write_desc_source.set_pause_generator(generator())
self.write_data_source.set_pause_generator(generator())
self.read_desc_source.set_pause_generator(generator())
self.axi_ram.write_if.b_channel.set_pause_generator(generator())
self.axi_ram.read_if.r_channel.set_pause_generator(generator())
def set_backpressure_generator(self, generator=None):
if generator:
self.read_data_sink.set_pause_generator(generator())
self.axi_ram.write_if.aw_channel.set_pause_generator(generator())
self.axi_ram.write_if.w_channel.set_pause_generator(generator())
self.axi_ram.read_if.ar_channel.set_pause_generator(generator())
async def cycle_reset(self):
self.dut.rst.setimmediatevalue(0)
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
self.dut.rst.value = 1
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
self.dut.rst.value = 0
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
async def run_test_write(dut, data_in=None, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
byte_lanes = tb.axi_ram.write_if.byte_lanes
step_size = 1 if int(dut.UNALIGNED_EN.value) else byte_lanes
tag_count = 2**len(tb.write_desc_source.bus.req_tag)
cur_tag = 1
await tb.cycle_reset()
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
dut.write_enable.value = 1
for length in list(range(1, byte_lanes*4+1))+[128]:
for offset in list(range(0, byte_lanes*2, step_size))+list(range(4096-byte_lanes*2, 4096, step_size)):
for diff in [-8, -2, -1, 0, 1, 2, 8]:
if length+diff < 1:
continue
tb.log.info("length %d, offset %d, diff %d", length, offset, diff)
addr = offset+0x1000
test_data = bytearray([x % 256 for x in range(length)])
test_data2 = bytearray([x % 256 for x in range(length+diff)])
tb.axi_ram.write(addr-128, b'\xaa'*(len(test_data)+256))
desc = DescTransaction(req_dst_addr=addr, req_len=len(test_data), req_tag=cur_tag)
await tb.write_desc_source.send(desc)
await tb.write_data_source.send(AxiStreamFrame(test_data2, tid=cur_tag))
status = await tb.write_desc_status_sink.recv()
tb.log.info("status: %s", status)
assert int(status.sts_len) == min(len(test_data), len(test_data2))
assert int(status.sts_tag) == cur_tag
assert int(status.sts_id) == cur_tag
assert int(status.sts_error) == 0
tb.log.debug("%s", tb.axi_ram.hexdump_str((addr & ~0xf)-16, (((addr & 0xf)+length-1) & ~0xf)+48))
if len(test_data) <= len(test_data2):
assert tb.axi_ram.read(addr-8, len(test_data)+16) == b'\xaa'*8+test_data+b'\xaa'*8
else:
assert tb.axi_ram.read(addr-8, len(test_data2)+16) == b'\xaa'*8+test_data2+b'\xaa'*8
cur_tag = (cur_tag + 1) % tag_count
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
async def run_test_read(dut, data_in=None, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
byte_lanes = tb.axi_ram.read_if.byte_lanes
step_size = 1 if int(dut.UNALIGNED_EN.value) else byte_lanes
tag_count = 2**len(tb.read_desc_source.bus.req_tag)
cur_tag = 1
await tb.cycle_reset()
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
dut.read_enable.value = 1
for length in list(range(1, byte_lanes*4+1))+[128]:
for offset in list(range(0, byte_lanes*2, step_size))+list(range(4096-byte_lanes*2, 4096, step_size)):
tb.log.info("length %d, offset %d", length, offset)
addr = offset+0x1000
test_data = bytearray([x % 256 for x in range(length)])
tb.axi_ram.write(addr-128, b'\xaa'*(len(test_data)+256))
tb.axi_ram.write(addr, test_data)
tb.log.debug("%s", tb.axi_ram.hexdump_str((addr & ~0xf)-16, (((addr & 0xf)+length-1) & ~0xf)+48))
desc = DescTransaction(req_src_addr=addr, req_len=len(test_data), req_tag=cur_tag, req_id=cur_tag)
await tb.read_desc_source.send(desc)
status = await tb.read_desc_status_sink.recv()
read_data = await tb.read_data_sink.recv()
tb.log.info("status: %s", status)
tb.log.info("read_data: %s", read_data)
assert int(status.sts_tag) == cur_tag
assert int(status.sts_error) == 0
assert read_data.tdata == test_data
assert read_data.tid == cur_tag
cur_tag = (cur_tag + 1) % tag_count
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
def cycle_pause():
return itertools.cycle([1, 1, 1, 0])
if getattr(cocotb, 'top', None) is not None:
for test in [run_test_write, run_test_read]:
factory = TestFactory(test)
factory.add_option("idle_inserter", [None, cycle_pause])
factory.add_option("backpressure_inserter", [None, cycle_pause])
factory.generate_tests()
# cocotb-test
tests_dir = os.path.dirname(__file__)
rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl'))
lib_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'lib'))
taxi_src_dir = os.path.abspath(os.path.join(lib_dir, 'taxi', 'src'))
def process_f_files(files):
lst = {}
for f in files:
if f[-2:].lower() == '.f':
with open(f, 'r') as fp:
l = fp.read().split()
for f in process_f_files([os.path.join(os.path.dirname(f), x) for x in l]):
lst[os.path.basename(f)] = f
else:
lst[os.path.basename(f)] = f
return list(lst.values())
@pytest.mark.parametrize("axi_data_w", [8, 16, 32])
@pytest.mark.parametrize("unaligned", [0, 1])
def test_taxi_axi_dma(request, axi_data_w, unaligned):
dut = "taxi_axi_dma"
module = os.path.splitext(os.path.basename(__file__))[0]
toplevel = module
verilog_sources = [
os.path.join(tests_dir, f"{toplevel}.sv"),
os.path.join(rtl_dir, f"{dut}.f"),
]
verilog_sources = process_f_files(verilog_sources)
parameters = {}
axis_data_w = axi_data_w
parameters['AXI_DATA_W'] = axi_data_w
parameters['AXI_ADDR_W'] = 16
parameters['AXI_STRB_W'] = parameters['AXI_DATA_W'] // 8
parameters['AXI_ID_W'] = 8
parameters['AXI_MAX_BURST_LEN'] = 16
parameters['AXIS_DATA_W'] = axis_data_w
parameters['AXIS_KEEP_EN'] = int(parameters['AXIS_DATA_W'] > 8)
parameters['AXIS_KEEP_W'] = parameters['AXIS_DATA_W'] // 8
parameters['AXIS_LAST_EN'] = 1
parameters['AXIS_ID_EN'] = 1
parameters['AXIS_ID_W'] = 8
parameters['AXIS_DEST_EN'] = 0
parameters['AXIS_DEST_W'] = 8
parameters['AXIS_USER_EN'] = 1
parameters['AXIS_USER_W'] = 1
parameters['LEN_W'] = 20
parameters['TAG_W'] = 8
parameters['UNALIGNED_EN'] = unaligned
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
sim_build = os.path.join(tests_dir, "sim_build",
request.node.name.replace('[', '-').replace(']', ''))
cocotb_test.simulator.run(
simulator="verilator",
python_search=[tests_dir],
verilog_sources=verilog_sources,
toplevel=toplevel,
module=module,
parameters=parameters,
sim_build=sim_build,
extra_env=extra_env,
)

View File

@@ -0,0 +1,140 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* AXI4 DMA testbench
*/
module test_taxi_axi_dma #
(
/* verilator lint_off WIDTHTRUNC */
parameter AXI_DATA_W = 32,
parameter AXI_ADDR_W = 16,
parameter AXI_STRB_W = AXI_DATA_W / 8,
parameter AXI_ID_W = 8,
parameter AXI_MAX_BURST_LEN = 16,
parameter AXIS_DATA_W = AXI_DATA_W,
parameter logic AXIS_KEEP_EN = AXIS_DATA_W > 8,
parameter AXIS_KEEP_W = AXIS_DATA_W / 8,
parameter logic AXIS_LAST_EN = 1'b1,
parameter logic AXIS_ID_EN = 1'b1,
parameter AXIS_ID_W = 8,
parameter logic AXIS_DEST_EN = 1'b1,
parameter AXIS_DEST_W = 8,
parameter logic AXIS_USER_EN = 1'b1,
parameter AXIS_USER_W = 8,
parameter LEN_W = 20,
parameter TAG_W = 8,
parameter logic UNALIGNED_EN = 1'b1
/* verilator lint_on WIDTHTRUNC */
)
();
logic clk;
logic rst;
taxi_dma_desc_if #(
.SRC_ADDR_W(AXI_ADDR_W),
.SRC_SEL_EN(1'b0),
.SRC_ASID_EN(1'b0),
.DST_ADDR_W(AXI_ADDR_W),
.DST_SEL_EN(1'b0),
.DST_ASID_EN(1'b0),
.IMM_EN(1'b0),
.LEN_W(LEN_W),
.TAG_W(TAG_W),
.ID_EN(AXIS_ID_EN),
.ID_W(AXIS_ID_W),
.DEST_EN(AXIS_DEST_EN),
.DEST_W(AXIS_DEST_W),
.USER_EN(AXIS_USER_EN),
.USER_W(AXIS_USER_W)
) rd_desc(), wr_desc();
taxi_axis_if #(
.DATA_W(AXIS_DATA_W),
.KEEP_EN(AXIS_KEEP_EN),
.KEEP_W(AXIS_KEEP_W),
.LAST_EN(AXIS_LAST_EN),
.ID_EN(AXIS_ID_EN),
.ID_W(AXIS_ID_W),
.DEST_EN(AXIS_DEST_EN),
.DEST_W(AXIS_DEST_W),
.USER_EN(AXIS_USER_EN),
.USER_W(AXIS_USER_W)
) s_axis_wr_data(), m_axis_rd_data();
taxi_axi_if #(
.DATA_W(AXI_DATA_W),
.ADDR_W(AXI_ADDR_W),
.STRB_W(AXI_STRB_W),
.ID_W(AXI_ID_W),
.AWUSER_EN(1'b0),
.WUSER_EN(1'b0),
.BUSER_EN(1'b0),
.ARUSER_EN(1'b0),
.RUSER_EN(1'b0),
.MAX_BURST_LEN(AXI_MAX_BURST_LEN)
) m_axi();
logic read_enable;
logic write_enable;
logic write_abort;
taxi_axi_dma #(
.AXI_MAX_BURST_LEN(AXI_MAX_BURST_LEN),
.UNALIGNED_EN(UNALIGNED_EN)
)
uut (
.clk(clk),
.rst(rst),
/*
* DMA read descriptor
*/
.rd_desc_req(rd_desc),
.rd_desc_sts(rd_desc),
/*
* DMA write descriptor
*/
.wr_desc_req(wr_desc),
.wr_desc_sts(wr_desc),
/*
* AXI stream read data output
*/
.m_axis_rd_data(m_axis_rd_data),
/*
* AXI stream write data input
*/
.s_axis_wr_data(s_axis_wr_data),
/*
* AXI4 master interface
*/
.m_axi_wr(m_axi),
.m_axi_rd(m_axi),
/*
* Configuration
*/
.read_enable(read_enable),
.write_enable(write_enable),
.write_abort(write_abort)
);
endmodule
`resetall