dma: Add AXI central DMA module and testbench

Signed-off-by: Alex Forencich <alex@alexforencich.com>
This commit is contained in:
Alex Forencich
2025-11-03 11:42:04 -08:00
parent 999602cf11
commit 9442bb7fbb
5 changed files with 1093 additions and 0 deletions

View File

@@ -63,6 +63,7 @@ To facilitate the dual-license model, contributions to the project can only be a
* Direct Memory Access
* SV interface for segmented RAM
* SV interface for DMA descriptors
* AXI central DMA
* Segmented SDP RAM
* Segmented dual-clock SDP RAM
* Ethernet

View File

@@ -0,0 +1,736 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2018-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* AXI4 Central DMA
*/
module taxi_axi_cdma #
(
// Maximum AXI burst length to generate
parameter AXI_MAX_BURST_LEN = 256,
// Enable support for unaligned transfers
parameter logic UNALIGNED_EN = 1'b1
)
(
input wire logic clk,
input wire logic rst,
/*
* DMA descriptor
*/
taxi_dma_desc_if.req_snk desc_req,
taxi_dma_desc_if.sts_src desc_sts,
/*
* AXI4 master interface
*/
taxi_axi_if.wr_mst m_axi_wr,
taxi_axi_if.rd_mst m_axi_rd,
/*
* Configuration
*/
input wire logic enable
);
// extract parameters
localparam AXI_DATA_W = m_axi_wr.DATA_W;
localparam AXI_ADDR_W = m_axi_wr.ADDR_W;
localparam AXI_STRB_W = m_axi_wr.STRB_W;
localparam AXI_ID_W = m_axi_wr.ID_W;
localparam AXI_MAX_BURST_LEN_INT = AXI_MAX_BURST_LEN < m_axi_wr.MAX_BURST_LEN ? AXI_MAX_BURST_LEN : m_axi_wr.MAX_BURST_LEN;
localparam LEN_W = desc_req.LEN_W;
localparam TAG_W = desc_req.TAG_W;
localparam AXI_BYTE_LANES = AXI_STRB_W;
localparam AXI_BYTE_SIZE = AXI_DATA_W/AXI_BYTE_LANES;
localparam AXI_BURST_SIZE = $clog2(AXI_STRB_W);
localparam AXI_MAX_BURST_SIZE = AXI_MAX_BURST_LEN_INT << AXI_BURST_SIZE;
localparam OFFSET_W = AXI_STRB_W > 1 ? $clog2(AXI_STRB_W) : 1;
localparam OFFSET_MASK = AXI_STRB_W > 1 ? {OFFSET_W{1'b1}} : 0;
localparam ADDR_MASK = {AXI_ADDR_W{1'b1}} << $clog2(AXI_STRB_W);
localparam CYCLE_CNT_W = 13 - AXI_BURST_SIZE;
localparam STATUS_FIFO_AW = 5;
localparam OUTPUT_FIFO_AW = 5;
// check configuration
if (AXI_BYTE_SIZE * AXI_STRB_W != AXI_DATA_W)
$fatal(0, "Error: AXI data width not evenly divisible (instance %m)");
if (2**$clog2(AXI_BYTE_LANES) != AXI_BYTE_LANES)
$fatal(0, "Error: AXI word width must be even power of two (instance %m)");
if (AXI_MAX_BURST_LEN_INT < 1 || AXI_MAX_BURST_LEN_INT > 256)
$fatal(0, "Error: AXI_MAX_BURST_LEN must be between 1 and 256 (instance %m)");
if (desc_req.SRC_ADDR_W < AXI_ADDR_W || desc_req.DST_ADDR_W < AXI_ADDR_W)
$fatal(0, "Error: Descriptor address width is not sufficient (instance %m)");
localparam logic [1:0]
AXI_RESP_OKAY = 2'b00,
AXI_RESP_EXOKAY = 2'b01,
AXI_RESP_SLVERR = 2'b10,
AXI_RESP_DECERR = 2'b11;
localparam logic [3:0]
DMA_ERROR_NONE = 4'd0,
DMA_ERROR_TIMEOUT = 4'd1,
DMA_ERROR_PARITY = 4'd2,
DMA_ERROR_AXI_RD_SLVERR = 4'd4,
DMA_ERROR_AXI_RD_DECERR = 4'd5,
DMA_ERROR_AXI_WR_SLVERR = 4'd6,
DMA_ERROR_AXI_WR_DECERR = 4'd7,
DMA_ERROR_PCIE_FLR = 4'd8,
DMA_ERROR_PCIE_CPL_POISONED = 4'd9,
DMA_ERROR_PCIE_CPL_STATUS_UR = 4'd10,
DMA_ERROR_PCIE_CPL_STATUS_CA = 4'd11;
localparam logic [1:0]
READ_STATE_IDLE = 2'd0,
READ_STATE_START = 2'd1,
READ_STATE_REQ = 2'd2;
logic [1:0] read_state_reg = READ_STATE_IDLE, read_state_next;
localparam logic [0:0]
AXI_STATE_IDLE = 1'd0,
AXI_STATE_WRITE = 1'd1;
logic [0:0] axi_state_reg = AXI_STATE_IDLE, axi_state_next;
// datapath control signals
logic transfer_in_save;
logic axi_cmd_ready;
logic status_fifo_we;
logic [AXI_ADDR_W-1:0] read_addr_reg = '0, read_addr_next;
logic [AXI_ADDR_W-1:0] write_addr_reg = '0, write_addr_next;
logic [LEN_W-1:0] op_count_reg = '0, op_count_next;
logic [12:0] tr_count_reg = '0, tr_count_next;
logic [12:0] axi_count_reg = '0, axi_count_next;
logic [AXI_ADDR_W-1:0] axi_cmd_addr_reg = '0, axi_cmd_addr_next;
logic [OFFSET_W-1:0] axi_cmd_offset_reg = '0, axi_cmd_offset_next;
logic [OFFSET_W-1:0] axi_cmd_first_cycle_offset_reg = '0, axi_cmd_first_cycle_offset_next;
logic [OFFSET_W-1:0] axi_cmd_last_cycle_offset_reg = '0, axi_cmd_last_cycle_offset_next;
logic [CYCLE_CNT_W-1:0] axi_cmd_input_cycle_count_reg = '0, axi_cmd_input_cycle_count_next;
logic [CYCLE_CNT_W-1:0] axi_cmd_output_cycle_count_reg = '0, axi_cmd_output_cycle_count_next;
logic axi_cmd_bubble_cycle_reg = 1'b0, axi_cmd_bubble_cycle_next;
logic axi_cmd_last_transfer_reg = 1'b0, axi_cmd_last_transfer_next;
logic [TAG_W-1:0] axi_cmd_tag_reg = '0, axi_cmd_tag_next;
logic axi_cmd_valid_reg = 1'b0, axi_cmd_valid_next;
logic [OFFSET_W-1:0] offset_reg = '0, offset_next;
logic [OFFSET_W-1:0] first_cycle_offset_reg = '0, first_cycle_offset_next;
logic [OFFSET_W-1:0] last_cycle_offset_reg = '0, last_cycle_offset_next;
logic [CYCLE_CNT_W-1:0] input_cycle_count_reg = '0, input_cycle_count_next;
logic [CYCLE_CNT_W-1:0] output_cycle_count_reg = '0, output_cycle_count_next;
logic input_active_reg = 1'b0, input_active_next;
logic output_active_reg = 1'b0, output_active_next;
logic bubble_cycle_reg = 1'b0, bubble_cycle_next;
logic first_input_cycle_reg = 1'b0, first_input_cycle_next;
logic first_output_cycle_reg = 1'b0, first_output_cycle_next;
logic output_last_cycle_reg = 1'b0, output_last_cycle_next;
logic last_transfer_reg = 1'b0, last_transfer_next;
logic [1:0] rresp_reg = AXI_RESP_OKAY, rresp_next;
logic [1:0] bresp_reg = AXI_RESP_OKAY, bresp_next;
logic [TAG_W-1:0] tag_reg = '0, tag_next;
logic [STATUS_FIFO_AW+1-1:0] status_fifo_wr_ptr_reg = '0;
logic [STATUS_FIFO_AW+1-1:0] status_fifo_rd_ptr_reg = '0, status_fifo_rd_ptr_next;
logic [TAG_W-1:0] status_fifo_tag[2**STATUS_FIFO_AW];
logic [1:0] status_fifo_resp[2**STATUS_FIFO_AW];
logic status_fifo_last[2**STATUS_FIFO_AW];
logic [TAG_W-1:0] status_fifo_wr_tag;
logic [1:0] status_fifo_wr_resp;
logic status_fifo_wr_last;
logic [STATUS_FIFO_AW+1-1:0] active_count_reg = '0;
logic active_count_av_reg = 1'b1;
logic inc_active;
logic dec_active;
logic desc_req_ready_reg = 1'b0, desc_req_ready_next;
logic [TAG_W-1:0] desc_sts_tag_reg = '0, desc_sts_tag_next;
logic [3:0] desc_sts_error_reg = 4'd0, desc_sts_error_next;
logic desc_sts_valid_reg = 1'b0, desc_sts_valid_next;
logic [AXI_ADDR_W-1:0] m_axi_araddr_reg = '0, m_axi_araddr_next;
logic [7:0] m_axi_arlen_reg = 8'd0, m_axi_arlen_next;
logic m_axi_arvalid_reg = 1'b0, m_axi_arvalid_next;
logic m_axi_rready_reg = 1'b0, m_axi_rready_next;
logic [AXI_ADDR_W-1:0] m_axi_awaddr_reg = '0, m_axi_awaddr_next;
logic [7:0] m_axi_awlen_reg = 8'd0, m_axi_awlen_next;
logic m_axi_awvalid_reg = 1'b0, m_axi_awvalid_next;
logic m_axi_bready_reg = 1'b0, m_axi_bready_next;
logic [AXI_DATA_W-1:0] save_axi_rdata_reg = '0;
wire [AXI_DATA_W*2-1:0] axi_rdata_full = {m_axi_rd.rdata, save_axi_rdata_reg};
wire [AXI_DATA_W-1:0] shift_axi_rdata = axi_rdata_full[(OFFSET_W+1)'(AXI_STRB_W-offset_reg)*AXI_BYTE_SIZE +: AXI_DATA_W];
// internal datapath
logic [AXI_DATA_W-1:0] m_axi_wdata_int;
logic [AXI_STRB_W-1:0] m_axi_wstrb_int;
logic m_axi_wlast_int;
logic m_axi_wvalid_int;
wire m_axi_wready_int;
assign desc_req.req_ready = desc_req_ready_reg;
assign desc_sts.sts_len = '0;
assign desc_sts.sts_tag = desc_sts_tag_reg;
assign desc_sts.sts_id = '0;
assign desc_sts.sts_dest = '0;
assign desc_sts.sts_user = '0;
assign desc_sts.sts_error = desc_sts_error_reg;
assign desc_sts.sts_valid = desc_sts_valid_reg;
assign m_axi_rd.arid = '0;
assign m_axi_rd.araddr = m_axi_araddr_reg;
assign m_axi_rd.arlen = m_axi_arlen_reg;
assign m_axi_rd.arsize = 3'(AXI_BURST_SIZE);
assign m_axi_rd.arburst = 2'b01;
assign m_axi_rd.arlock = 1'b0;
assign m_axi_rd.arcache = 4'b0011;
assign m_axi_rd.arprot = 3'b010;
assign m_axi_rd.arvalid = m_axi_arvalid_reg;
assign m_axi_rd.rready = m_axi_rready_reg;
assign m_axi_wr.awid = '0;
assign m_axi_wr.awaddr = m_axi_awaddr_reg;
assign m_axi_wr.awlen = m_axi_awlen_reg;
assign m_axi_wr.awsize = 3'(AXI_BURST_SIZE);
assign m_axi_wr.awburst = 2'b01;
assign m_axi_wr.awlock = 1'b0;
assign m_axi_wr.awcache = 4'b0011;
assign m_axi_wr.awprot = 3'b010;
assign m_axi_wr.awvalid = m_axi_awvalid_reg;
assign m_axi_wr.bready = m_axi_bready_reg;
always_comb begin
read_state_next = READ_STATE_IDLE;
desc_req_ready_next = 1'b0;
m_axi_araddr_next = m_axi_araddr_reg;
m_axi_arlen_next = m_axi_arlen_reg;
m_axi_arvalid_next = m_axi_arvalid_reg && !m_axi_rd.arready;
read_addr_next = read_addr_reg;
write_addr_next = write_addr_reg;
op_count_next = op_count_reg;
tr_count_next = tr_count_reg;
axi_count_next = axi_count_reg;
axi_cmd_addr_next = axi_cmd_addr_reg;
axi_cmd_offset_next = axi_cmd_offset_reg;
axi_cmd_first_cycle_offset_next = axi_cmd_first_cycle_offset_reg;
axi_cmd_last_cycle_offset_next = axi_cmd_last_cycle_offset_reg;
axi_cmd_input_cycle_count_next = axi_cmd_input_cycle_count_reg;
axi_cmd_output_cycle_count_next = axi_cmd_output_cycle_count_reg;
axi_cmd_bubble_cycle_next = axi_cmd_bubble_cycle_reg;
axi_cmd_last_transfer_next = axi_cmd_last_transfer_reg;
axi_cmd_tag_next = axi_cmd_tag_reg;
axi_cmd_valid_next = axi_cmd_valid_reg && !axi_cmd_ready;
inc_active = 1'b0;
case (read_state_reg)
READ_STATE_IDLE: begin
// idle state - load new descriptor to start operation
desc_req_ready_next = !axi_cmd_valid_reg && enable && active_count_av_reg;
if (desc_req.req_ready && desc_req.req_valid) begin
if (UNALIGNED_EN) begin
read_addr_next = desc_req.req_src_addr;
write_addr_next = desc_req.req_dst_addr;
end else begin
read_addr_next = desc_req.req_src_addr & ADDR_MASK;
write_addr_next = desc_req.req_dst_addr & ADDR_MASK;
end
axi_cmd_tag_next = desc_req.req_tag;
op_count_next = desc_req.req_len;
desc_req_ready_next = 1'b0;
read_state_next = READ_STATE_START;
end else begin
read_state_next = READ_STATE_IDLE;
end
end
READ_STATE_START: begin
// start state - compute write length
if (!axi_cmd_valid_reg && active_count_av_reg) begin
if (op_count_reg <= LEN_W'(AXI_MAX_BURST_SIZE) - LEN_W'(write_addr_reg & OFFSET_MASK) || AXI_MAX_BURST_SIZE >= 4096) begin
// packet smaller than max burst size
if ((12'(write_addr_reg & 12'hfff) + 12'(op_count_reg & 12'hfff)) >> 12 != 0 || op_count_reg >> 12 != 0) begin
// crosses 4k boundary
axi_count_next = 13'h1000 - 12'(write_addr_reg & 12'hfff);
end else begin
// does not cross 4k boundary
axi_count_next = 13'(op_count_reg);
end
end else begin
// packet larger than max burst size
if ((12'(write_addr_reg & 12'hfff) + 12'(AXI_MAX_BURST_SIZE)) >> 12 != 0) begin
// crosses 4k boundary
axi_count_next = 13'h1000 - 12'(write_addr_reg & 12'hfff);
end else begin
// does not cross 4k boundary
axi_count_next = 13'(AXI_MAX_BURST_SIZE) - 13'(write_addr_reg & OFFSET_MASK);
end
end
write_addr_next = write_addr_reg + AXI_ADDR_W'(axi_count_next);
op_count_next = op_count_reg - LEN_W'(axi_count_next);
axi_cmd_addr_next = write_addr_reg;
if (UNALIGNED_EN) begin
axi_cmd_input_cycle_count_next = CYCLE_CNT_W'((axi_count_next + 13'(read_addr_reg & OFFSET_MASK) - 13'd1) >> AXI_BURST_SIZE);
axi_cmd_output_cycle_count_next = CYCLE_CNT_W'((axi_count_next + 13'(write_addr_reg & OFFSET_MASK) - 13'd1) >> AXI_BURST_SIZE);
axi_cmd_offset_next = OFFSET_W'(write_addr_reg & OFFSET_MASK) - OFFSET_W'(read_addr_reg & OFFSET_MASK);
axi_cmd_bubble_cycle_next = OFFSET_W'(read_addr_reg & OFFSET_MASK) > OFFSET_W'(write_addr_reg & OFFSET_MASK);
axi_cmd_first_cycle_offset_next = OFFSET_W'(write_addr_reg & OFFSET_MASK);
axi_cmd_last_cycle_offset_next = axi_cmd_first_cycle_offset_next + OFFSET_W'(axi_count_next & OFFSET_MASK);
end else begin
axi_cmd_input_cycle_count_next = CYCLE_CNT_W'((axi_count_next - 13'd1) >> AXI_BURST_SIZE);
axi_cmd_output_cycle_count_next = CYCLE_CNT_W'((axi_count_next - 13'd1) >> AXI_BURST_SIZE);
axi_cmd_offset_next = '0;
axi_cmd_bubble_cycle_next = '0;
axi_cmd_first_cycle_offset_next = '0;
axi_cmd_last_cycle_offset_next = OFFSET_W'(axi_count_next & OFFSET_MASK);
end
axi_cmd_last_transfer_next = op_count_next == 0;
axi_cmd_valid_next = 1'b1;
inc_active = 1'b1;
read_state_next = READ_STATE_REQ;
end else begin
read_state_next = READ_STATE_START;
end
end
READ_STATE_REQ: begin
// request state - issue AXI read requests
if (!m_axi_rd.arvalid) begin
if (axi_count_reg <= 13'(AXI_MAX_BURST_SIZE) - 13'(read_addr_reg & OFFSET_MASK) || AXI_MAX_BURST_SIZE >= 4096) begin
// packet smaller than max burst size
if ((12'(read_addr_reg & 12'hfff) + 12'(axi_count_reg & 12'hfff)) >> 12 != 0 || axi_count_reg >> 12 != 0) begin
// crosses 4k boundary
tr_count_next = 13'h1000 - 12'(read_addr_reg & 12'hfff);
end else begin
// does not cross 4k boundary
tr_count_next = 13'(axi_count_reg);
end
end else begin
// packet larger than max burst size
if ((12'(read_addr_reg & 12'hfff) + 12'(AXI_MAX_BURST_SIZE)) >> 12 != 0) begin
// crosses 4k boundary
tr_count_next = 13'h1000 - 12'(read_addr_reg & 12'hfff);
end else begin
// does not cross 4k boundary
tr_count_next = 13'(AXI_MAX_BURST_SIZE) - 13'(read_addr_reg & OFFSET_MASK);
end
end
m_axi_araddr_next = read_addr_reg;
if (UNALIGNED_EN) begin
m_axi_arlen_next = 8'((tr_count_next + 13'(read_addr_reg & OFFSET_MASK) - 13'd1) >> AXI_BURST_SIZE);
end else begin
m_axi_arlen_next = 8'((tr_count_next - 13'd1) >> AXI_BURST_SIZE);
end
m_axi_arvalid_next = 1'b1;
read_addr_next = read_addr_reg + AXI_ADDR_W'(tr_count_next);
axi_count_next = axi_count_reg - tr_count_next;
if (axi_count_next > 0) begin
read_state_next = READ_STATE_REQ;
end else if (op_count_next > 0) begin
read_state_next = READ_STATE_START;
end else begin
desc_req_ready_next = !axi_cmd_valid_reg && enable && active_count_av_reg;
read_state_next = READ_STATE_IDLE;
end
end else begin
read_state_next = READ_STATE_REQ;
end
end
default: begin
// invalid state
read_state_next = READ_STATE_IDLE;
end
endcase
end
always_comb begin
axi_state_next = AXI_STATE_IDLE;
desc_sts_tag_next = desc_sts_tag_reg;
desc_sts_error_next = desc_sts_error_reg;
desc_sts_valid_next = 1'b0;
m_axi_awaddr_next = m_axi_awaddr_reg;
m_axi_awlen_next = m_axi_awlen_reg;
m_axi_awvalid_next = m_axi_awvalid_reg && !m_axi_wr.awready;
m_axi_wdata_int = shift_axi_rdata;
m_axi_wstrb_int = '0;
m_axi_wlast_int = 1'b0;
m_axi_wvalid_int = 1'b0;
m_axi_bready_next = 1'b0;
m_axi_rready_next = 1'b0;
transfer_in_save = 1'b0;
axi_cmd_ready = 1'b0;
status_fifo_we = 1'b0;
offset_next = offset_reg;
first_cycle_offset_next = first_cycle_offset_reg;
last_cycle_offset_next = last_cycle_offset_reg;
input_cycle_count_next = input_cycle_count_reg;
output_cycle_count_next = output_cycle_count_reg;
input_active_next = input_active_reg;
output_active_next = output_active_reg;
bubble_cycle_next = bubble_cycle_reg;
first_input_cycle_next = first_input_cycle_reg;
first_output_cycle_next = first_output_cycle_reg;
output_last_cycle_next = output_last_cycle_reg;
last_transfer_next = last_transfer_reg;
tag_next = tag_reg;
status_fifo_rd_ptr_next = status_fifo_rd_ptr_reg;
dec_active = 1'b0;
if (m_axi_rd.rready && m_axi_rd.rvalid && (m_axi_rd.rresp == AXI_RESP_SLVERR || m_axi_rd.rresp == AXI_RESP_DECERR)) begin
rresp_next = m_axi_rd.rresp;
end else begin
rresp_next = rresp_reg;
end
if (m_axi_wr.bready && m_axi_wr.bvalid && (m_axi_wr.bresp == AXI_RESP_SLVERR || m_axi_wr.bresp == AXI_RESP_DECERR)) begin
bresp_next = m_axi_wr.bresp;
end else begin
bresp_next = bresp_reg;
end
status_fifo_wr_tag = tag_reg;
status_fifo_wr_resp = rresp_next;
status_fifo_wr_last = 1'b0;
case (axi_state_reg)
AXI_STATE_IDLE: begin
// idle state - load new descriptor to start operation
m_axi_rready_next = 1'b0;
// store transfer parameters
if (UNALIGNED_EN) begin
offset_next = axi_cmd_offset_reg;
first_cycle_offset_next = axi_cmd_first_cycle_offset_reg;
end else begin
offset_next = 0;
first_cycle_offset_next = 0;
end
last_cycle_offset_next = axi_cmd_last_cycle_offset_reg;
input_cycle_count_next = axi_cmd_input_cycle_count_reg;
output_cycle_count_next = axi_cmd_output_cycle_count_reg;
bubble_cycle_next = axi_cmd_bubble_cycle_reg;
last_transfer_next = axi_cmd_last_transfer_reg;
tag_next = axi_cmd_tag_reg;
output_last_cycle_next = output_cycle_count_next == 0;
input_active_next = 1'b1;
output_active_next = 1'b1;
first_input_cycle_next = 1'b1;
first_output_cycle_next = 1'b1;
if (!m_axi_wr.awvalid && axi_cmd_valid_reg) begin
axi_cmd_ready = 1'b1;
m_axi_awaddr_next = axi_cmd_addr_reg;
m_axi_awlen_next = 8'(axi_cmd_output_cycle_count_reg);
m_axi_awvalid_next = 1'b1;
m_axi_rready_next = m_axi_wready_int;
axi_state_next = AXI_STATE_WRITE;
end
end
AXI_STATE_WRITE: begin
// handle AXI read data
m_axi_rready_next = m_axi_wready_int && input_active_reg;
if ((m_axi_rd.rready && m_axi_rd.rvalid) || !input_active_reg) begin
// transfer in AXI read data
transfer_in_save = m_axi_rd.rready && m_axi_rd.rvalid;
if (UNALIGNED_EN && first_input_cycle_reg && bubble_cycle_reg) begin
if (input_active_reg) begin
input_cycle_count_next = input_cycle_count_reg - 1;
input_active_next = input_cycle_count_reg > 0;
end
bubble_cycle_next = 1'b0;
first_input_cycle_next = 1'b0;
m_axi_rready_next = m_axi_wready_int && input_active_next;
axi_state_next = AXI_STATE_WRITE;
end else begin
// update counters
if (input_active_reg) begin
input_cycle_count_next = input_cycle_count_reg - 1;
input_active_next = input_cycle_count_reg > 0;
end
if (output_active_reg) begin
output_cycle_count_next = output_cycle_count_reg - 1;
output_active_next = output_cycle_count_reg > 0;
end
output_last_cycle_next = output_cycle_count_next == 0;
bubble_cycle_next = 1'b0;
first_input_cycle_next = 1'b0;
first_output_cycle_next = 1'b0;
// pass through read data
m_axi_wdata_int = shift_axi_rdata;
if (first_output_cycle_reg) begin
m_axi_wstrb_int = {AXI_STRB_W{1'b1}} << first_cycle_offset_reg;
end else begin
m_axi_wstrb_int = {AXI_STRB_W{1'b1}};
end
m_axi_wvalid_int = 1'b1;
if (output_last_cycle_reg) begin
// no more data to transfer, finish operation
if (last_cycle_offset_reg > 0) begin
m_axi_wstrb_int = m_axi_wstrb_int & {AXI_STRB_W{1'b1}} >> (OFFSET_W'(AXI_STRB_W) - OFFSET_W'(last_cycle_offset_reg));
end
m_axi_wlast_int = 1'b1;
status_fifo_we = 1'b1;
status_fifo_wr_tag = tag_reg;
status_fifo_wr_resp = rresp_next;
status_fifo_wr_last = last_transfer_reg;
if (last_transfer_reg) begin
rresp_next = AXI_RESP_OKAY;
end
m_axi_rready_next = 1'b0;
axi_state_next = AXI_STATE_IDLE;
end else begin
// more cycles in AXI transfer
axi_state_next = AXI_STATE_WRITE;
end
end
end else begin
axi_state_next = AXI_STATE_WRITE;
end
end
endcase
if (status_fifo_rd_ptr_reg != status_fifo_wr_ptr_reg) begin
// status FIFO not empty
if (m_axi_wr.bready && m_axi_wr.bvalid) begin
// got write completion, pop and return status
desc_sts_tag_next = status_fifo_tag[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]];
if (status_fifo_resp[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]] == AXI_RESP_SLVERR) begin
desc_sts_error_next = DMA_ERROR_AXI_RD_SLVERR;
end else if (status_fifo_resp[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]] == AXI_RESP_DECERR) begin
desc_sts_error_next = DMA_ERROR_AXI_RD_DECERR;
end else if (bresp_next == AXI_RESP_SLVERR) begin
desc_sts_error_next = DMA_ERROR_AXI_WR_SLVERR;
end else if (bresp_next == AXI_RESP_DECERR) begin
desc_sts_error_next = DMA_ERROR_AXI_WR_DECERR;
end else begin
desc_sts_error_next = DMA_ERROR_NONE;
end
desc_sts_valid_next = status_fifo_last[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]];
status_fifo_rd_ptr_next = status_fifo_rd_ptr_reg + 1;
m_axi_bready_next = 1'b0;
if (status_fifo_last[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]]) begin
bresp_next = AXI_RESP_OKAY;
end
dec_active = 1'b1;
end else begin
// wait for write completion
m_axi_bready_next = 1'b1;
end
end
end
always_ff @(posedge clk) begin
read_state_reg <= read_state_next;
axi_state_reg <= axi_state_next;
desc_req_ready_reg <= desc_req_ready_next;
desc_sts_tag_reg <= desc_sts_tag_next;
desc_sts_error_reg <= desc_sts_error_next;
desc_sts_valid_reg <= desc_sts_valid_next;
m_axi_awaddr_reg <= m_axi_awaddr_next;
m_axi_awlen_reg <= m_axi_awlen_next;
m_axi_awvalid_reg <= m_axi_awvalid_next;
m_axi_bready_reg <= m_axi_bready_next;
m_axi_araddr_reg <= m_axi_araddr_next;
m_axi_arlen_reg <= m_axi_arlen_next;
m_axi_arvalid_reg <= m_axi_arvalid_next;
m_axi_rready_reg <= m_axi_rready_next;
read_addr_reg <= read_addr_next;
write_addr_reg <= write_addr_next;
op_count_reg <= op_count_next;
tr_count_reg <= tr_count_next;
axi_count_reg <= axi_count_next;
axi_cmd_addr_reg <= axi_cmd_addr_next;
axi_cmd_offset_reg <= axi_cmd_offset_next;
axi_cmd_first_cycle_offset_reg <= axi_cmd_first_cycle_offset_next;
axi_cmd_last_cycle_offset_reg <= axi_cmd_last_cycle_offset_next;
axi_cmd_input_cycle_count_reg <= axi_cmd_input_cycle_count_next;
axi_cmd_output_cycle_count_reg <= axi_cmd_output_cycle_count_next;
axi_cmd_bubble_cycle_reg <= axi_cmd_bubble_cycle_next;
axi_cmd_last_transfer_reg <= axi_cmd_last_transfer_next;
axi_cmd_tag_reg <= axi_cmd_tag_next;
axi_cmd_valid_reg <= axi_cmd_valid_next;
offset_reg <= offset_next;
first_cycle_offset_reg <= first_cycle_offset_next;
last_cycle_offset_reg <= last_cycle_offset_next;
input_cycle_count_reg <= input_cycle_count_next;
output_cycle_count_reg <= output_cycle_count_next;
input_active_reg <= input_active_next;
output_active_reg <= output_active_next;
bubble_cycle_reg <= bubble_cycle_next;
first_input_cycle_reg <= first_input_cycle_next;
first_output_cycle_reg <= first_output_cycle_next;
output_last_cycle_reg <= output_last_cycle_next;
last_transfer_reg <= last_transfer_next;
rresp_reg <= rresp_next;
bresp_reg <= bresp_next;
tag_reg <= tag_next;
if (transfer_in_save) begin
save_axi_rdata_reg <= m_axi_rd.rdata;
end
if (status_fifo_we) begin
status_fifo_tag[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_tag;
status_fifo_resp[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_resp;
status_fifo_last[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_last;
status_fifo_wr_ptr_reg <= status_fifo_wr_ptr_reg + 1;
end
status_fifo_rd_ptr_reg <= status_fifo_rd_ptr_next;
if (active_count_reg < 2**STATUS_FIFO_AW && inc_active && !dec_active) begin
active_count_reg <= active_count_reg + 1;
active_count_av_reg <= active_count_reg < (2**STATUS_FIFO_AW-1);
end else if (active_count_reg > 0 && !inc_active && dec_active) begin
active_count_reg <= active_count_reg - 1;
active_count_av_reg <= 1'b1;
end else begin
active_count_av_reg <= active_count_reg < 2**STATUS_FIFO_AW;
end
if (rst) begin
read_state_reg <= READ_STATE_IDLE;
axi_state_reg <= AXI_STATE_IDLE;
desc_req_ready_reg <= 1'b0;
desc_sts_valid_reg <= 1'b0;
m_axi_awvalid_reg <= 1'b0;
m_axi_bready_reg <= 1'b0;
m_axi_arvalid_reg <= 1'b0;
m_axi_rready_reg <= 1'b0;
axi_cmd_valid_reg <= 1'b0;
rresp_reg <= AXI_RESP_OKAY;
bresp_reg <= AXI_RESP_OKAY;
status_fifo_wr_ptr_reg <= '0;
status_fifo_rd_ptr_reg <= '0;
active_count_reg <= '0;
active_count_av_reg <= 1'b1;
end
end
// output datapath logic
logic [AXI_DATA_W-1:0] m_axi_wdata_reg = '0;
logic [AXI_STRB_W-1:0] m_axi_wstrb_reg = '0;
logic m_axi_wlast_reg = 1'b0;
logic m_axi_wvalid_reg = 1'b0;
logic [OUTPUT_FIFO_AW+1-1:0] out_fifo_wr_ptr_reg = '0;
logic [OUTPUT_FIFO_AW+1-1:0] out_fifo_rd_ptr_reg = '0;
logic out_fifo_half_full_reg = 1'b0;
wire out_fifo_full = out_fifo_wr_ptr_reg == (out_fifo_rd_ptr_reg ^ {1'b1, {OUTPUT_FIFO_AW{1'b0}}});
wire out_fifo_empty = out_fifo_wr_ptr_reg == out_fifo_rd_ptr_reg;
(* ram_style = "distributed" *)
logic [AXI_DATA_W-1:0] out_fifo_wdata[2**OUTPUT_FIFO_AW];
(* ram_style = "distributed" *)
logic [AXI_STRB_W-1:0] out_fifo_wstrb[2**OUTPUT_FIFO_AW];
(* ram_style = "distributed" *)
logic out_fifo_wlast[2**OUTPUT_FIFO_AW];
assign m_axi_wready_int = !out_fifo_half_full_reg;
assign m_axi_wr.wdata = m_axi_wdata_reg;
assign m_axi_wr.wstrb = m_axi_wstrb_reg;
assign m_axi_wr.wvalid = m_axi_wvalid_reg;
assign m_axi_wr.wlast = m_axi_wlast_reg;
always_ff @(posedge clk) begin
m_axi_wvalid_reg <= m_axi_wvalid_reg && !m_axi_wr.wready;
out_fifo_half_full_reg <= $unsigned(out_fifo_wr_ptr_reg - out_fifo_rd_ptr_reg) >= 2**(OUTPUT_FIFO_AW-1);
if (!out_fifo_full && m_axi_wvalid_int) begin
out_fifo_wdata[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axi_wdata_int;
out_fifo_wstrb[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axi_wstrb_int;
out_fifo_wlast[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axi_wlast_int;
out_fifo_wr_ptr_reg <= out_fifo_wr_ptr_reg + 1;
end
if (!out_fifo_empty && (!m_axi_wvalid_reg || m_axi_wr.wready)) begin
m_axi_wdata_reg <= out_fifo_wdata[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]];
m_axi_wstrb_reg <= out_fifo_wstrb[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]];
m_axi_wlast_reg <= out_fifo_wlast[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]];
m_axi_wvalid_reg <= 1'b1;
out_fifo_rd_ptr_reg <= out_fifo_rd_ptr_reg + 1;
end
if (rst) begin
out_fifo_wr_ptr_reg <= '0;
out_fifo_rd_ptr_reg <= '0;
m_axi_wvalid_reg <= 1'b0;
end
end
endmodule
`resetall

View File

@@ -0,0 +1,59 @@
# SPDX-License-Identifier: CERN-OHL-S-2.0
#
# Copyright (c) 2020-2025 FPGA Ninja, LLC
#
# Authors:
# - Alex Forencich
TOPLEVEL_LANG = verilog
SIM ?= verilator
WAVES ?= 0
COCOTB_HDL_TIMEUNIT = 1ns
COCOTB_HDL_TIMEPRECISION = 1ps
RTL_DIR = ../../rtl
LIB_DIR = ../../lib
TAXI_SRC_DIR = $(LIB_DIR)/taxi/src
DUT = taxi_axi_cdma
COCOTB_TEST_MODULES = test_$(DUT)
COCOTB_TOPLEVEL = test_$(DUT)
MODULE = $(COCOTB_TEST_MODULES)
TOPLEVEL = $(COCOTB_TOPLEVEL)
VERILOG_SOURCES += $(COCOTB_TOPLEVEL).sv
VERILOG_SOURCES += $(RTL_DIR)/$(DUT).sv
VERILOG_SOURCES += $(RTL_DIR)/taxi_dma_desc_if.sv
VERILOG_SOURCES += $(TAXI_SRC_DIR)/axi/rtl/taxi_axi_if.sv
# handle file list files
process_f_file = $(call process_f_files,$(addprefix $(dir $1),$(shell cat $1)))
process_f_files = $(foreach f,$1,$(if $(filter %.f,$f),$(call process_f_file,$f),$f))
uniq_base = $(if $1,$(call uniq_base,$(foreach f,$1,$(if $(filter-out $(notdir $(lastword $1)),$(notdir $f)),$f,))) $(lastword $1))
VERILOG_SOURCES := $(call uniq_base,$(call process_f_files,$(VERILOG_SOURCES)))
# module parameters
export PARAM_AXI_DATA_W := 32
export PARAM_AXI_ADDR_W := 16
export PARAM_AXI_STRB_W := $(shell expr $(PARAM_AXI_DATA_W) / 8 )
export PARAM_AXI_ID_W := 8
export PARAM_AXI_MAX_BURST_LEN := 16
export PARAM_LEN_W := 20
export PARAM_TAG_W := 8
export PARAM_UNALIGNED_EN := 1
ifeq ($(SIM), icarus)
PLUSARGS += -fst
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(COCOTB_TOPLEVEL).$(subst PARAM_,,$(v))=$($(v)))
else ifeq ($(SIM), verilator)
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v)))
ifeq ($(WAVES), 1)
COMPILE_ARGS += --trace-fst
VERILATOR_TRACE = 1
endif
endif
include $(shell cocotb-config --makefiles)/Makefile.sim

View File

@@ -0,0 +1,203 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: CERN-OHL-S-2.0
"""
Copyright (c) 2020-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
"""
import itertools
import logging
import os
import cocotb_test.simulator
import pytest
import cocotb
from cocotb.clock import Clock
from cocotb.triggers import RisingEdge
from cocotb.regression import TestFactory
from cocotbext.axi import AxiBus, AxiRam
from cocotbext.axi.stream import define_stream
DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc",
signals=["req_src_addr", "req_dst_addr", "req_len", "req_tag", "req_valid", "req_ready"],
optional_signals=["req_id", "req_dest", "req_user"]
)
DescStatusBus, DescStatusTransaction, DescStatusSource, DescStatusSink, DescStatusMonitor = define_stream("DescStatus",
signals=["sts_tag", "sts_error", "sts_valid"],
optional_signals=["sts_len", "sts_id", "sts_dest", "sts_user"]
)
class TB(object):
def __init__(self, dut):
self.dut = dut
self.log = logging.getLogger("cocotb.tb")
self.log.setLevel(logging.DEBUG)
cocotb.start_soon(Clock(dut.clk, 10, units="ns").start())
# control interface
self.desc_source = DescSource(DescBus.from_entity(dut.dma_desc), dut.clk, dut.rst)
self.desc_status_sink = DescStatusSink(DescStatusBus.from_entity(dut.dma_desc), dut.clk, dut.rst)
# AXI interface
self.axi_ram = AxiRam(AxiBus.from_entity(dut.m_axi), dut.clk, dut.rst, size=2**16)
dut.enable.setimmediatevalue(0)
def set_idle_generator(self, generator=None):
if generator:
self.desc_source.set_pause_generator(generator())
self.axi_ram.write_if.b_channel.set_pause_generator(generator())
self.axi_ram.read_if.r_channel.set_pause_generator(generator())
def set_backpressure_generator(self, generator=None):
if generator:
self.axi_ram.write_if.aw_channel.set_pause_generator(generator())
self.axi_ram.write_if.w_channel.set_pause_generator(generator())
self.axi_ram.read_if.ar_channel.set_pause_generator(generator())
async def cycle_reset(self):
self.dut.rst.setimmediatevalue(0)
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
self.dut.rst.value = 1
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
self.dut.rst.value = 0
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
async def run_test(dut, data_in=None, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
byte_lanes = tb.axi_ram.write_if.byte_lanes
step_size = 1 if int(dut.UNALIGNED_EN.value) else byte_lanes
tag_count = 2**len(tb.desc_source.bus.req_tag)
cur_tag = 1
await tb.cycle_reset()
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
dut.enable.value = 1
for length in list(range(1, byte_lanes*4+1))+[128]:
for read_offset in list(range(8, 8+byte_lanes*2, step_size))+list(range(4096-byte_lanes*2, 4096, step_size)):
for write_offset in list(range(8, 8+byte_lanes*2, step_size))+list(range(4096-byte_lanes*2, 4096, step_size)):
tb.log.info("length %d, read_offset %d, write_offset %d", length, read_offset, write_offset)
read_addr = read_offset+0x1000
write_addr = 0x00008000+write_offset+0x1000
test_data = bytearray([x % 256 for x in range(length)])
tb.axi_ram.write(read_addr, test_data)
tb.axi_ram.write(write_addr & 0xffff80, b'\xaa'*(len(test_data)+256))
desc = DescTransaction(req_src_addr=read_addr, req_dst_addr=write_addr, req_len=len(test_data), req_tag=cur_tag)
await tb.desc_source.send(desc)
status = await tb.desc_status_sink.recv()
tb.log.info("status: %s", status)
assert int(status.sts_tag) == cur_tag
assert int(status.sts_error) == 0
tb.log.debug("%s", tb.axi_ram.hexdump_str((write_addr & ~0xf)-16, (((write_addr & 0xf)+length-1) & ~0xf)+48))
assert tb.axi_ram.read(write_addr-8, len(test_data)+16) == b'\xaa'*8+test_data+b'\xaa'*8
cur_tag = (cur_tag + 1) % tag_count
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
def cycle_pause():
return itertools.cycle([1, 1, 1, 0])
if getattr(cocotb, 'top', None) is not None:
for test in [run_test]:
factory = TestFactory(test)
factory.add_option("idle_inserter", [None, cycle_pause])
factory.add_option("backpressure_inserter", [None, cycle_pause])
factory.generate_tests()
# cocotb-test
tests_dir = os.path.dirname(__file__)
rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl'))
lib_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'lib'))
taxi_src_dir = os.path.abspath(os.path.join(lib_dir, 'taxi', 'src'))
def process_f_files(files):
lst = {}
for f in files:
if f[-2:].lower() == '.f':
with open(f, 'r') as fp:
l = fp.read().split()
for f in process_f_files([os.path.join(os.path.dirname(f), x) for x in l]):
lst[os.path.basename(f)] = f
else:
lst[os.path.basename(f)] = f
return list(lst.values())
@pytest.mark.parametrize("unaligned", [0, 1])
@pytest.mark.parametrize("axi_data_w", [8, 16, 32])
def test_taxi_axi_cdma(request, axi_data_w, unaligned):
dut = "taxi_axi_cdma"
module = os.path.splitext(os.path.basename(__file__))[0]
toplevel = module
verilog_sources = [
os.path.join(tests_dir, f"{toplevel}.sv"),
os.path.join(rtl_dir, f"{dut}.sv"),
os.path.join(rtl_dir, "taxi_dma_desc_if.sv"),
os.path.join(taxi_src_dir, "axi", "rtl", "taxi_axi_if.sv"),
]
verilog_sources = process_f_files(verilog_sources)
parameters = {}
parameters['AXI_DATA_W'] = axi_data_w
parameters['AXI_ADDR_W'] = 16
parameters['AXI_STRB_W'] = parameters['AXI_DATA_W'] // 8
parameters['AXI_ID_W'] = 8
parameters['AXI_MAX_BURST_LEN'] = 16
parameters['LEN_W'] = 20
parameters['TAG_W'] = 8
parameters['UNALIGNED_EN'] = unaligned
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
sim_build = os.path.join(tests_dir, "sim_build",
request.node.name.replace('[', '-').replace(']', ''))
cocotb_test.simulator.run(
simulator="verilator",
python_search=[tests_dir],
verilog_sources=verilog_sources,
toplevel=toplevel,
module=module,
parameters=parameters,
sim_build=sim_build,
extra_env=extra_env,
)

View File

@@ -0,0 +1,94 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* AXI4 Central DMA testbench
*/
module test_taxi_axi_cdma #
(
/* verilator lint_off WIDTHTRUNC */
parameter AXI_DATA_W = 32,
parameter AXI_ADDR_W = 16,
parameter AXI_STRB_W = AXI_DATA_W / 8,
parameter AXI_ID_W = 8,
parameter AXI_MAX_BURST_LEN = 16,
parameter LEN_W = 20,
parameter TAG_W = 8,
parameter logic UNALIGNED_EN = 1'b1
/* verilator lint_on WIDTHTRUNC */
)
();
logic clk;
logic rst;
taxi_dma_desc_if #(
.SRC_ADDR_W(AXI_ADDR_W),
.SRC_SEL_EN(1'b0),
.SRC_ASID_EN(1'b0),
.DST_ADDR_W(AXI_ADDR_W),
.DST_SEL_EN(1'b0),
.DST_ASID_EN(1'b0),
.IMM_EN(1'b0),
.LEN_W(LEN_W),
.TAG_W(TAG_W),
.ID_EN(1'b0),
.DEST_EN(1'b0),
.USER_EN(1'b0)
) dma_desc();
taxi_axi_if #(
.DATA_W(AXI_DATA_W),
.ADDR_W(AXI_ADDR_W),
.STRB_W(AXI_STRB_W),
.ID_W(AXI_ID_W),
.AWUSER_EN(1'b0),
.WUSER_EN(1'b0),
.BUSER_EN(1'b0),
.ARUSER_EN(1'b0),
.RUSER_EN(1'b0),
.MAX_BURST_LEN(AXI_MAX_BURST_LEN)
) m_axi();
logic enable;
taxi_axi_cdma #(
.AXI_MAX_BURST_LEN(AXI_MAX_BURST_LEN),
.UNALIGNED_EN(UNALIGNED_EN)
)
uut (
.clk(clk),
.rst(rst),
/*
* DMA descriptor
*/
.desc_req(dma_desc),
.desc_sts(dma_desc),
/*
* AXI4 master interface
*/
.m_axi_wr(m_axi),
.m_axi_rd(m_axi),
/*
* Configuration
*/
.enable(enable)
);
endmodule
`resetall