dma: Add UltraScale PCIe DMA interface module and testbench

Signed-off-by: Alex Forencich <alex@alexforencich.com>
This commit is contained in:
Alex Forencich
2025-11-04 17:18:26 -08:00
parent 14d988d1f2
commit b0dd91aa8d
17 changed files with 5832 additions and 0 deletions

View File

@@ -67,6 +67,7 @@ To facilitate the dual-license model, contributions to the project can only be a
* AXI streaming DMA
* DMA client for AXI stream
* DMA interface for AXI
* DMA interface for UltraScale PCIe
* Segmented SDP RAM
* Segmented dual-clock SDP RAM
* Ethernet

View File

@@ -0,0 +1,6 @@
taxi_dma_if_pcie_us.sv
taxi_dma_if_pcie_us_rd.sv
taxi_dma_if_pcie_us_wr.sv
taxi_dma_desc_if.sv
taxi_dma_ram_if.sv
../lib/taxi/src/axis/rtl/taxi_axis_if.sv

View File

@@ -0,0 +1,307 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2019-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* UltraScale PCIe DMA interface
*/
module taxi_dma_if_pcie_us #
(
// RQ sequence number width
parameter RQ_SEQ_NUM_W = 6,
// RQ sequence number tracking enable
parameter logic RQ_SEQ_NUM_EN = 1'b0,
// PCIe tag count
parameter PCIE_TAG_CNT = 64,
// Operation table size (read)
parameter RD_OP_TBL_SIZE = PCIE_TAG_CNT,
// In-flight transmit limit (read)
parameter RD_TX_LIMIT = 2**(RQ_SEQ_NUM_W-1),
// Transmit flow control (read)
parameter logic RD_TX_FC_EN = 1'b0,
// Completion header flow control credit limit (read)
parameter RD_CPLH_FC_LIMIT = 0,
// Completion data flow control credit limit (read)
parameter RD_CPLD_FC_LIMIT = RD_CPLH_FC_LIMIT*4,
// Operation table size (write)
parameter WR_OP_TBL_SIZE = 2**(RQ_SEQ_NUM_W-1),
// In-flight transmit limit (write)
parameter WR_TX_LIMIT = 2**(RQ_SEQ_NUM_W-1),
// Transmit flow control (write)
parameter logic WR_TX_FC_EN = 1'b0
)
(
input wire logic clk,
input wire logic rst,
/*
* UltraScale PCIe interface
*/
taxi_axis_if.src m_axis_rq,
taxi_axis_if.snk s_axis_rc,
/*
* Transmit sequence number input
*/
input wire logic [RQ_SEQ_NUM_W-1:0] s_axis_rq_seq_num_0,
input wire logic s_axis_rq_seq_num_valid_0,
input wire logic [RQ_SEQ_NUM_W-1:0] s_axis_rq_seq_num_1,
input wire logic s_axis_rq_seq_num_valid_1,
/*
* Transmit flow control
*/
input wire logic [7:0] pcie_tx_fc_nph_av,
input wire logic [7:0] pcie_tx_fc_ph_av,
input wire logic [11:0] pcie_tx_fc_pd_av,
/*
* Read descriptor
*/
taxi_dma_desc_if.req_snk rd_desc_req,
taxi_dma_desc_if.sts_src rd_desc_sts,
/*
* Write descriptor
*/
taxi_dma_desc_if.req_snk wr_desc_req,
taxi_dma_desc_if.sts_src wr_desc_sts,
/*
* RAM interface
*/
taxi_dma_ram_if.wr_mst dma_ram_wr,
taxi_dma_ram_if.rd_mst dma_ram_rd,
/*
* Configuration
*/
input wire logic read_enable,
input wire logic write_enable,
input wire logic ext_tag_en,
input wire logic rcb_128b,
input wire logic [15:0] requester_id,
input wire logic requester_id_en,
input wire logic [2:0] max_rd_req_size,
input wire logic [2:0] max_payload_size,
/*
* Status
*/
output wire logic stat_rd_busy,
output wire logic stat_wr_busy,
output wire logic stat_err_cor,
output wire logic stat_err_uncor,
/*
* Statistics
*/
output wire logic [$clog2(RD_OP_TBL_SIZE)-1:0] stat_rd_op_start_tag,
output wire logic stat_rd_op_start_valid,
output wire logic [$clog2(RD_OP_TBL_SIZE)-1:0] stat_rd_op_finish_tag,
output wire logic [3:0] stat_rd_op_finish_status,
output wire logic stat_rd_op_finish_valid,
output wire logic [$clog2(PCIE_TAG_CNT)-1:0] stat_rd_req_start_tag,
output wire logic [12:0] stat_rd_req_start_len,
output wire logic stat_rd_req_start_valid,
output wire logic [$clog2(PCIE_TAG_CNT)-1:0] stat_rd_req_finish_tag,
output wire logic [3:0] stat_rd_req_finish_status,
output wire logic stat_rd_req_finish_valid,
output wire logic stat_rd_req_timeout,
output wire logic stat_rd_op_tbl_full,
output wire logic stat_rd_no_tags,
output wire logic stat_rd_tx_limit,
output wire logic stat_rd_tx_stall,
output wire logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_op_start_tag,
output wire logic stat_wr_op_start_valid,
output wire logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_op_finish_tag,
output wire logic [3:0] stat_wr_op_finish_status,
output wire logic stat_wr_op_finish_valid,
output wire logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_req_start_tag,
output wire logic [12:0] stat_wr_req_start_len,
output wire logic stat_wr_req_start_valid,
output wire logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_req_finish_tag,
output wire logic [3:0] stat_wr_req_finish_status,
output wire logic stat_wr_req_finish_valid,
output wire logic stat_wr_op_tbl_full,
output wire logic stat_wr_tx_limit,
output wire logic stat_wr_tx_stall
);
taxi_axis_if #(
.DATA_W(m_axis_rq.DATA_W),
.KEEP_EN(1),
.KEEP_W(m_axis_rq.KEEP_W),
.USER_EN(1),
.USER_W(m_axis_rq.USER_W)
) axis_rq_int();
taxi_dma_if_pcie_us_rd #(
.RQ_SEQ_NUM_W(RQ_SEQ_NUM_W),
.RQ_SEQ_NUM_EN(RQ_SEQ_NUM_EN),
.PCIE_TAG_CNT(PCIE_TAG_CNT),
.OP_TBL_SIZE(RD_OP_TBL_SIZE),
.TX_LIMIT(RD_TX_LIMIT),
.TX_FC_EN(RD_TX_FC_EN),
.CPLH_FC_LIMIT(RD_CPLH_FC_LIMIT),
.CPLD_FC_LIMIT(RD_CPLD_FC_LIMIT)
)
rd_inst (
.clk(clk),
.rst(rst),
/*
* UltraScale PCIe interface
*/
.m_axis_rq(axis_rq_int),
.s_axis_rc(s_axis_rc),
/*
* Transmit sequence number input
*/
.s_axis_rq_seq_num_0(s_axis_rq_seq_num_0),
.s_axis_rq_seq_num_valid_0(s_axis_rq_seq_num_valid_0),
.s_axis_rq_seq_num_1(s_axis_rq_seq_num_1),
.s_axis_rq_seq_num_valid_1(s_axis_rq_seq_num_valid_1),
/*
* Transmit flow control
*/
.pcie_tx_fc_nph_av(pcie_tx_fc_nph_av),
/*
* Read descriptor
*/
.rd_desc_req(rd_desc_req),
.rd_desc_sts(rd_desc_sts),
/*
* RAM interface
*/
.dma_ram_wr(dma_ram_wr),
/*
* Configuration
*/
.enable(read_enable),
.ext_tag_en(ext_tag_en),
.rcb_128b(rcb_128b),
.requester_id(requester_id),
.requester_id_en(requester_id_en),
.max_rd_req_size(max_rd_req_size),
/*
* Status
*/
.stat_busy(stat_rd_busy),
.stat_err_cor(stat_err_cor),
.stat_err_uncor(stat_err_uncor),
/*
* Statistics
*/
.stat_rd_op_start_tag(stat_rd_op_start_tag),
.stat_rd_op_start_valid(stat_rd_op_start_valid),
.stat_rd_op_finish_tag(stat_rd_op_finish_tag),
.stat_rd_op_finish_status(stat_rd_op_finish_status),
.stat_rd_op_finish_valid(stat_rd_op_finish_valid),
.stat_rd_req_start_tag(stat_rd_req_start_tag),
.stat_rd_req_start_len(stat_rd_req_start_len),
.stat_rd_req_start_valid(stat_rd_req_start_valid),
.stat_rd_req_finish_tag(stat_rd_req_finish_tag),
.stat_rd_req_finish_status(stat_rd_req_finish_status),
.stat_rd_req_finish_valid(stat_rd_req_finish_valid),
.stat_rd_req_timeout(stat_rd_req_timeout),
.stat_rd_op_tbl_full(stat_rd_op_tbl_full),
.stat_rd_no_tags(stat_rd_no_tags),
.stat_rd_tx_limit(stat_rd_tx_limit),
.stat_rd_tx_stall(stat_rd_tx_stall)
);
taxi_dma_if_pcie_us_wr #(
.RQ_SEQ_NUM_W(RQ_SEQ_NUM_W),
.RQ_SEQ_NUM_EN(RQ_SEQ_NUM_EN),
.OP_TBL_SIZE(WR_OP_TBL_SIZE),
.TX_LIMIT(WR_TX_LIMIT),
.TX_FC_EN(WR_TX_FC_EN)
)
wr_inst (
.clk(clk),
.rst(rst),
/*
* UltraScale PCIe interface
*/
.s_axis_rq(axis_rq_int),
.m_axis_rq(m_axis_rq),
/*
* Transmit sequence number input
*/
.s_axis_rq_seq_num_0(s_axis_rq_seq_num_0),
.s_axis_rq_seq_num_valid_0(s_axis_rq_seq_num_valid_0),
.s_axis_rq_seq_num_1(s_axis_rq_seq_num_1),
.s_axis_rq_seq_num_valid_1(s_axis_rq_seq_num_valid_1),
/*
* Transmit flow control
*/
.pcie_tx_fc_ph_av(pcie_tx_fc_ph_av),
.pcie_tx_fc_pd_av(pcie_tx_fc_pd_av),
/*
* Write descriptor
*/
.wr_desc_req(wr_desc_req),
.wr_desc_sts(wr_desc_sts),
/*
* RAM interface
*/
.dma_ram_rd(dma_ram_rd),
/*
* Configuration
*/
.enable(write_enable),
.requester_id(requester_id),
.requester_id_en(requester_id_en),
.max_payload_size(max_payload_size),
/*
* Status
*/
.stat_busy(stat_wr_busy),
/*
* Statistics
*/
.stat_wr_op_start_tag(stat_wr_op_start_tag),
.stat_wr_op_start_valid(stat_wr_op_start_valid),
.stat_wr_op_finish_tag(stat_wr_op_finish_tag),
.stat_wr_op_finish_status(stat_wr_op_finish_status),
.stat_wr_op_finish_valid(stat_wr_op_finish_valid),
.stat_wr_req_start_tag(stat_wr_req_start_tag),
.stat_wr_req_start_len(stat_wr_req_start_len),
.stat_wr_req_start_valid(stat_wr_req_start_valid),
.stat_wr_req_finish_tag(stat_wr_req_finish_tag),
.stat_wr_req_finish_status(stat_wr_req_finish_status),
.stat_wr_req_finish_valid(stat_wr_req_finish_valid),
.stat_wr_op_tbl_full(stat_wr_op_tbl_full),
.stat_wr_tx_limit(stat_wr_tx_limit),
.stat_wr_tx_stall(stat_wr_tx_stall)
);
endmodule
`resetall

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,71 @@
# SPDX-License-Identifier: CERN-OHL-S-2.0
#
# Copyright (c) 2020-2025 FPGA Ninja, LLC
#
# Authors:
# - Alex Forencich
TOPLEVEL_LANG = verilog
SIM ?= verilator
WAVES ?= 0
COCOTB_HDL_TIMEUNIT = 1ns
COCOTB_HDL_TIMEPRECISION = 1ps
RTL_DIR = ../../rtl
LIB_DIR = ../../lib
TAXI_SRC_DIR = $(LIB_DIR)/taxi/src
DUT = taxi_dma_if_pcie_us
COCOTB_TEST_MODULES = test_$(DUT)
COCOTB_TOPLEVEL = test_$(DUT)
MODULE = $(COCOTB_TEST_MODULES)
TOPLEVEL = $(COCOTB_TOPLEVEL)
VERILOG_SOURCES += $(COCOTB_TOPLEVEL).sv
VERILOG_SOURCES += $(RTL_DIR)/$(DUT).f
# handle file list files
process_f_file = $(call process_f_files,$(addprefix $(dir $1),$(shell cat $1)))
process_f_files = $(foreach f,$1,$(if $(filter %.f,$f),$(call process_f_file,$f),$f))
uniq_base = $(if $1,$(call uniq_base,$(foreach f,$1,$(if $(filter-out $(notdir $(lastword $1)),$(notdir $f)),$f,))) $(lastword $1))
VERILOG_SOURCES := $(call uniq_base,$(call process_f_files,$(VERILOG_SOURCES)))
# module parameters
export PARAM_AXIS_PCIE_DATA_W := 64
export PARAM_AXIS_PCIE_KEEP_W := $(shell expr $(PARAM_AXIS_PCIE_DATA_W) / 32 )
export PARAM_AXIS_PCIE_RQ_USER_W := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_W)),62,137)
export PARAM_AXIS_PCIE_RC_USER_W := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_W)),75,161)
export PARAM_RQ_SEQ_NUM_W := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_W)),6,4)
export PARAM_RQ_SEQ_NUM_EN := 1
export PARAM_RAM_SEL_W := 2
export PARAM_RAM_ADDR_W := 16
export PARAM_RAM_SEGS := $(shell python -c "print(max(2, $(PARAM_AXIS_PCIE_DATA_W) * 2 // 128))")
export PARAM_PCIE_TAG_CNT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_W)),256,64)
export PARAM_IMM_EN := 1
export PARAM_IMM_W := $(PARAM_AXIS_PCIE_DATA_W)
export PARAM_LEN_W := 20
export PARAM_TAG_W := 8
export PARAM_RD_OP_TBL_SIZE := $(PARAM_PCIE_TAG_CNT)
export PARAM_RD_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_W)-1) ))" )
export PARAM_RD_TX_FC_EN := 1
export PARAM_RD_CPLH_FC_LIMIT := 512
export PARAM_RD_CPLD_FC_LIMIT := $(shell expr $(PARAM_RD_CPLH_FC_LIMIT) \* 4 )
export PARAM_WR_OP_TBL_SIZE := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_W)-1) ))" )
export PARAM_WR_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_W)-1) ))" )
export PARAM_WR_TX_FC_EN := 1
ifeq ($(SIM), icarus)
PLUSARGS += -fst
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(COCOTB_TOPLEVEL).$(subst PARAM_,,$(v))=$($(v)))
else ifeq ($(SIM), verilator)
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v)))
ifeq ($(WAVES), 1)
COMPILE_ARGS += --trace-fst
VERILATOR_TRACE = 1
endif
endif
include $(shell cocotb-config --makefiles)/Makefile.sim

View File

@@ -0,0 +1 @@
../dma_psdp_ram.py

View File

@@ -0,0 +1,469 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: CERN-OHL-S-2.0
"""
Copyright (c) 2020-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
"""
import itertools
import logging
import os
import sys
import cocotb_test.simulator
import pytest
import cocotb
from cocotb.triggers import RisingEdge, FallingEdge, Timer
from cocotb.regression import TestFactory
from cocotbext.axi import AxiStreamBus
from cocotbext.pcie.core import RootComplex
from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice
from cocotbext.axi.stream import define_stream
from cocotbext.axi.utils import hexdump_str
try:
from dma_psdp_ram import PsdpRam, PsdpRamBus
except ImportError:
# attempt import from current directory
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))
try:
from dma_psdp_ram import PsdpRam, PsdpRamBus
finally:
del sys.path[0]
DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc",
signals=["req_src_addr", "req_src_sel", "req_src_asid", "req_dst_addr", "req_dst_sel", "req_dst_asid", "req_len", "req_tag", "req_valid", "req_ready"],
optional_signals=["req_imm", "req_imm_en", "req_id", "req_dest", "req_user"]
)
DescStatusBus, DescStatusTransaction, DescStatusSource, DescStatusSink, DescStatusMonitor = define_stream("DescStatus",
signals=["sts_tag", "sts_error", "sts_valid"],
optional_signals=["sts_len", "sts_id", "sts_dest", "sts_user"]
)
class TB(object):
def __init__(self, dut):
self.dut = dut
self.log = logging.getLogger("cocotb.tb")
self.log.setLevel(logging.DEBUG)
# PCIe
self.rc = RootComplex()
self.dev = UltraScalePlusPcieDevice(
# configuration options
pcie_generation=3,
# pcie_link_width=2,
# user_clk_frequency=250e6,
alignment="dword",
cq_straddle=False,
cc_straddle=False,
rq_straddle=False,
rc_straddle=False,
rc_4tlp_straddle=False,
pf_count=1,
max_payload_size=1024,
enable_client_tag=True,
enable_extended_tag=True,
enable_parity=False,
enable_rx_msg_interface=False,
enable_sriov=False,
enable_extended_configuration=False,
pf0_msi_enable=True,
pf0_msi_count=32,
pf1_msi_enable=False,
pf1_msi_count=1,
pf2_msi_enable=False,
pf2_msi_count=1,
pf3_msi_enable=False,
pf3_msi_count=1,
pf0_msix_enable=False,
pf0_msix_table_size=0,
pf0_msix_table_bir=0,
pf0_msix_table_offset=0x00000000,
pf0_msix_pba_bir=0,
pf0_msix_pba_offset=0x00000000,
pf1_msix_enable=False,
pf1_msix_table_size=0,
pf1_msix_table_bir=0,
pf1_msix_table_offset=0x00000000,
pf1_msix_pba_bir=0,
pf1_msix_pba_offset=0x00000000,
pf2_msix_enable=False,
pf2_msix_table_size=0,
pf2_msix_table_bir=0,
pf2_msix_table_offset=0x00000000,
pf2_msix_pba_bir=0,
pf2_msix_pba_offset=0x00000000,
pf3_msix_enable=False,
pf3_msix_table_size=0,
pf3_msix_table_bir=0,
pf3_msix_table_offset=0x00000000,
pf3_msix_pba_bir=0,
pf3_msix_pba_offset=0x00000000,
# signals
user_clk=dut.clk,
user_reset=dut.rst,
rq_bus=AxiStreamBus.from_entity(dut.m_axis_rq),
pcie_rq_seq_num0=dut.s_axis_rq_seq_num_0,
pcie_rq_seq_num_vld0=dut.s_axis_rq_seq_num_valid_0,
pcie_rq_seq_num1=dut.s_axis_rq_seq_num_1,
pcie_rq_seq_num_vld1=dut.s_axis_rq_seq_num_valid_1,
rc_bus=AxiStreamBus.from_entity(dut.s_axis_rc),
cfg_max_payload=dut.max_payload_size,
cfg_max_read_req=dut.max_rd_req_size,
cfg_fc_sel=0b100,
cfg_fc_ph=dut.pcie_tx_fc_ph_av,
cfg_fc_pd=dut.pcie_tx_fc_pd_av,
cfg_fc_nph=dut.pcie_tx_fc_nph_av,
)
self.dev.log.setLevel(logging.DEBUG)
self.rc.make_port().connect(self.dev)
# DMA RAM
self.dma_ram = PsdpRam(PsdpRamBus.from_entity(dut.dma_ram), dut.clk, dut.rst, size=2**16)
# Control
self.read_desc_source = DescSource(DescBus.from_entity(dut.rd_desc), dut.clk, dut.rst)
self.read_desc_status_sink = DescStatusSink(DescStatusBus.from_entity(dut.rd_desc), dut.clk, dut.rst)
self.write_desc_source = DescSource(DescBus.from_entity(dut.wr_desc), dut.clk, dut.rst)
self.write_desc_status_sink = DescStatusSink(DescStatusBus.from_entity(dut.wr_desc), dut.clk, dut.rst)
dut.requester_id.setimmediatevalue(0)
dut.requester_id_en.setimmediatevalue(0)
dut.ext_tag_en.setimmediatevalue(0)
dut.rcb_128b.setimmediatevalue(0)
dut.read_enable.setimmediatevalue(0)
dut.write_enable.setimmediatevalue(0)
# monitor error outputs
self.stat_err_cor_asserted = False
self.stat_err_uncor_asserted = False
cocotb.start_soon(self._run_monitor_stat_err_cor())
cocotb.start_soon(self._run_monitor_stat_err_uncor())
def set_idle_generator(self, generator=None):
if generator:
self.dev.rc_source.set_pause_generator(generator())
def set_backpressure_generator(self, generator=None):
if generator:
self.dev.rq_sink.set_pause_generator(generator())
self.dma_ram.write_if.set_pause_generator(generator())
self.dma_ram.read_if.set_pause_generator(generator())
async def _run_monitor_stat_err_cor(self):
while True:
await RisingEdge(self.dut.stat_err_cor)
self.log.info("stat_err_cor (correctable error) was asserted")
self.stat_err_cor_asserted = True
async def _run_monitor_stat_err_uncor(self):
while True:
await RisingEdge(self.dut.stat_err_uncor)
self.log.info("stat_err_uncor (uncorrectable error) was asserted")
self.stat_err_uncor_asserted = True
async def run_test_write(dut, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
byte_lanes = tb.dma_ram.write_if.byte_lanes
tag_count = 2**len(tb.write_desc_source.bus.req_tag)
cur_tag = 1
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
await FallingEdge(dut.rst)
await Timer(100, 'ns')
await tb.rc.enumerate()
dev = tb.rc.find_device(tb.dev.functions[0].pcie_id)
await dev.enable_device()
await dev.set_master()
mem = tb.rc.mem_pool.alloc_region(16*1024*1024)
mem_base = mem.get_absolute_address(0)
tb.dut.write_enable.value = 1
for length in list(range(0, byte_lanes+3))+list(range(128-4, 128+4))+[1024]:
for pcie_offset in list(range(4))+list(range(4096-4, 4096)):
for ram_offset in range(1):
tb.log.info("length %d, pcie_offset %d, ram_offset %d", length, pcie_offset, ram_offset)
pcie_addr = pcie_offset+0x1000
ram_addr = ram_offset+0x1000
test_data = bytearray([x % 256 for x in range(length)])
tb.dma_ram.write(ram_addr & 0xffff80, b'\x55'*(len(test_data)+256))
mem[pcie_addr-128:pcie_addr-128+len(test_data)+256] = b'\xaa'*(len(test_data)+256)
tb.dma_ram.write(ram_addr, test_data)
tb.log.debug("%s", tb.dma_ram.hexdump_str((ram_addr & ~0xf)-16, (((ram_addr & 0xf)+length-1) & ~0xf)+48, prefix="RAM "))
desc = DescTransaction(req_dst_addr=mem_base+pcie_addr, req_src_addr=ram_addr, req_src_sel=0, req_len=len(test_data), req_tag=cur_tag)
await tb.write_desc_source.send(desc)
status = await tb.write_desc_status_sink.recv()
await Timer(100 + (length // byte_lanes), 'ns')
tb.log.info("status: %s", status)
assert int(status.sts_tag) == cur_tag
assert int(status.sts_error) == 0
tb.log.debug("%s", hexdump_str(mem, (pcie_addr & ~0xf)-16, (((pcie_addr & 0xf)+length-1) & ~0xf)+48, prefix="PCIe "))
assert mem[pcie_addr-1:pcie_addr+len(test_data)+1] == b'\xaa'+test_data+b'\xaa'
cur_tag = (cur_tag + 1) % tag_count
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
async def run_test_read(dut, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
byte_lanes = tb.dma_ram.write_if.byte_lanes
tag_count = 2**len(tb.read_desc_source.bus.req_tag)
cur_tag = 1
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
await FallingEdge(dut.rst)
await Timer(100, 'ns')
await tb.rc.enumerate()
dev = tb.rc.find_device(tb.dev.functions[0].pcie_id)
await dev.enable_device()
await dev.set_master()
mem = tb.rc.mem_pool.alloc_region(16*1024*1024)
mem_base = mem.get_absolute_address(0)
tb.dut.read_enable.value = 1
for length in list(range(0, byte_lanes+3))+list(range(128-4, 128+4))+[1024]:
for pcie_offset in list(range(4))+list(range(4096-4, 4096)):
for ram_offset in range(1):
tb.log.info("length %d, pcie_offset %d, ram_offset %d", length, pcie_offset, ram_offset)
pcie_addr = pcie_offset+0x1000
ram_addr = ram_offset+0x1000
test_data = bytearray([x % 256 for x in range(length)])
mem[pcie_addr:pcie_addr+len(test_data)] = test_data
tb.log.debug("%s", hexdump_str(mem, (pcie_addr & ~0xf)-16, (((pcie_addr & 0xf)+length-1) & ~0xf)+48, prefix="PCIe "))
tb.dma_ram.write(ram_addr-256, b'\xaa'*(len(test_data)+512))
desc = DescTransaction(req_src_addr=mem_base+pcie_addr, req_dst_addr=ram_addr, req_dst_sel=0, req_len=len(test_data), req_tag=cur_tag)
await tb.read_desc_source.send(desc)
status = await tb.read_desc_status_sink.recv()
tb.log.info("status: %s", status)
assert int(status.sts_tag) == cur_tag
assert int(status.sts_error) == 0
tb.log.debug("%s", tb.dma_ram.hexdump_str((ram_addr & ~0xf)-16, (((ram_addr & 0xf)+length-1) & ~0xf)+48, prefix="RAM "))
assert tb.dma_ram.read(ram_addr-8, len(test_data)+16) == b'\xaa'*8+test_data+b'\xaa'*8
cur_tag = (cur_tag + 1) % tag_count
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
async def run_test_read_errors(dut, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
tag_count = 2**len(tb.read_desc_source.bus.req_tag)
cur_tag = 1
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
await FallingEdge(dut.rst)
await Timer(100, 'ns')
await tb.rc.enumerate()
dev = tb.rc.find_device(tb.dev.functions[0].pcie_id)
await dev.enable_device()
await dev.set_master()
mem = tb.rc.mem_pool.alloc_region(16*1024*1024)
mem_base = mem.get_absolute_address(0)
tb.dut.read_enable.value = 1
tb.log.info("Test bad DMA read (UR) short")
desc = DescTransaction(req_src_addr=mem_base-512, req_dst_addr=0, req_dst_sel=0, req_len=8, req_tag=cur_tag)
await tb.read_desc_source.send(desc)
status = await tb.read_desc_status_sink.recv()
tb.log.info("status: %s", status)
assert int(status.sts_tag) == cur_tag
assert int(status.sts_error) in {10, 11}
cur_tag = (cur_tag + 1) % tag_count
tb.log.info("Test bad DMA read (UR) first")
desc = DescTransaction(req_src_addr=mem_base-512, req_dst_addr=0, req_dst_sel=0, req_len=1024, req_tag=cur_tag)
await tb.read_desc_source.send(desc)
status = await tb.read_desc_status_sink.recv()
tb.log.info("status: %s", status)
assert int(status.sts_tag) == cur_tag
assert int(status.sts_error) in {10, 11}
cur_tag = (cur_tag + 1) % tag_count
tb.log.info("Test bad DMA read (UR) last")
desc = DescTransaction(req_src_addr=mem_base+16*1024*1024-512, req_dst_addr=0, req_dst_sel=0, req_len=1024, req_tag=cur_tag)
await tb.read_desc_source.send(desc)
status = await tb.read_desc_status_sink.recv()
tb.log.info("status: %s", status)
assert int(status.sts_tag) == cur_tag
assert int(status.sts_error) in {10, 11}
cur_tag = (cur_tag + 1) % tag_count
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
def cycle_pause():
return itertools.cycle([1, 1, 1, 0])
if getattr(cocotb, 'top', None) is not None:
for test in [
run_test_write,
run_test_read,
run_test_read_errors,
]:
factory = TestFactory(test)
factory.add_option(("idle_inserter", "backpressure_inserter"), [(None, None), (cycle_pause, cycle_pause)])
factory.generate_tests()
# cocotb-test
tests_dir = os.path.dirname(__file__)
rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl'))
lib_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'lib'))
taxi_src_dir = os.path.abspath(os.path.join(lib_dir, 'taxi', 'src'))
def process_f_files(files):
lst = {}
for f in files:
if f[-2:].lower() == '.f':
with open(f, 'r') as fp:
l = fp.read().split()
for f in process_f_files([os.path.join(os.path.dirname(f), x) for x in l]):
lst[os.path.basename(f)] = f
else:
lst[os.path.basename(f)] = f
return list(lst.values())
@pytest.mark.parametrize("axis_pcie_data_w", [64, 128, 256, 512])
def test_taxi_dma_if_pcie_us(request, axis_pcie_data_w):
dut = "taxi_dma_if_pcie_us"
module = os.path.splitext(os.path.basename(__file__))[0]
toplevel = module
verilog_sources = [
os.path.join(tests_dir, f"{toplevel}.sv"),
os.path.join(rtl_dir, f"{dut}.f"),
]
verilog_sources = process_f_files(verilog_sources)
parameters = {}
parameters['AXIS_PCIE_DATA_W'] = axis_pcie_data_w
parameters['AXIS_PCIE_KEEP_W'] = parameters['AXIS_PCIE_DATA_W'] // 32
parameters['AXIS_PCIE_RQ_USER_W'] = 62 if parameters['AXIS_PCIE_DATA_W'] < 512 else 137
parameters['AXIS_PCIE_RC_USER_W'] = 75 if parameters['AXIS_PCIE_DATA_W'] < 512 else 161
parameters['RQ_SEQ_NUM_W'] = 4 if parameters['AXIS_PCIE_RQ_USER_W'] == 60 else 6
parameters['RQ_SEQ_NUM_EN'] = 1
parameters['RAM_SEL_W'] = 2
parameters['RAM_ADDR_W'] = 16
parameters['RAM_SEGS'] = max(2, parameters['AXIS_PCIE_DATA_W']*2 // 128)
parameters['PCIE_TAG_CNT'] = 64 if parameters['AXIS_PCIE_RQ_USER_W'] == 60 else 256
parameters['IMM_EN'] = 1
parameters['IMM_W'] = parameters['AXIS_PCIE_DATA_W']
parameters['LEN_W'] = 20
parameters['TAG_W'] = 8
parameters['RD_OP_TBL_SIZE'] = parameters['PCIE_TAG_CNT']
parameters['RD_TX_LIMIT'] = 2**(parameters['RQ_SEQ_NUM_W']-1)
parameters['RD_TX_FC_EN'] = 1
parameters['RD_CPLH_FC_LIMIT'] = 512
parameters['RD_CPLD_FC_LIMIT'] = parameters['RD_CPLH_FC_LIMIT']*4
parameters['WR_OP_TBL_SIZE'] = 2**(parameters['RQ_SEQ_NUM_W']-1)
parameters['WR_TX_LIMIT'] = 2**(parameters['RQ_SEQ_NUM_W']-1)
parameters['WR_TX_FC_EN'] = 1
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
sim_build = os.path.join(tests_dir, "sim_build",
request.node.name.replace('[', '-').replace(']', ''))
cocotb_test.simulator.run(
simulator="verilator",
python_search=[tests_dir],
verilog_sources=verilog_sources,
toplevel=toplevel,
module=module,
parameters=parameters,
sim_build=sim_build,
extra_env=extra_env,
)

View File

@@ -0,0 +1,286 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* UltraScale PCIe DMA interface testbench
*/
module test_taxi_dma_if_pcie_us #
(
/* verilator lint_off WIDTHTRUNC */
parameter AXIS_PCIE_DATA_W = 64,
parameter AXIS_PCIE_KEEP_W = AXIS_PCIE_DATA_W / 32,
parameter AXIS_PCIE_RQ_USER_W = AXIS_PCIE_DATA_W < 512 ? 62 : 137,
parameter AXIS_PCIE_RC_USER_W = AXIS_PCIE_DATA_W < 512 ? 75 : 161,
parameter RQ_SEQ_NUM_W = AXIS_PCIE_RQ_USER_W == 60 ? 4 : 6,
parameter logic RQ_SEQ_NUM_EN = 1'b1,
parameter RAM_SEL_W = 2,
parameter RAM_ADDR_W = 16,
parameter RAM_SEGS = AXIS_PCIE_DATA_W > 256 ? AXIS_PCIE_DATA_W / 128 : 2,
parameter PCIE_TAG_CNT = AXIS_PCIE_RQ_USER_W == 60 ? 64 : 256,
parameter logic IMM_EN = 1,
parameter IMM_W = AXIS_PCIE_DATA_W,
parameter LEN_W = 20,
parameter TAG_W = 8,
parameter RD_OP_TBL_SIZE = PCIE_TAG_CNT,
parameter RD_TX_LIMIT = 2**(RQ_SEQ_NUM_W-1),
parameter logic RD_TX_FC_EN = 1'b1,
parameter RD_CPLH_FC_LIMIT = 512,
parameter RD_CPLD_FC_LIMIT = RD_CPLH_FC_LIMIT*4,
parameter WR_OP_TBL_SIZE = 2**(RQ_SEQ_NUM_W-1),
parameter WR_TX_LIMIT = 2**(RQ_SEQ_NUM_W-1),
parameter logic WR_TX_FC_EN = 1'b1
/* verilator lint_on WIDTHTRUNC */
)
();
localparam PCIE_ADDR_W = 64;
localparam RAM_DATA_W = AXIS_PCIE_DATA_W*2;
localparam RAM_SEG_DATA_W = RAM_DATA_W / RAM_SEGS;
localparam RAM_SEG_BE_W = RAM_SEG_DATA_W / 8;
localparam RAM_SEG_ADDR_W = RAM_ADDR_W - $clog2(RAM_SEGS*RAM_SEG_BE_W);
logic clk;
logic rst;
taxi_axis_if #(
.DATA_W(AXIS_PCIE_DATA_W),
.KEEP_EN(1'b1),
.KEEP_W(AXIS_PCIE_KEEP_W),
.LAST_EN(1'b1),
.ID_EN(1'b0),
.DEST_EN(1'b0),
.USER_EN(1'b1),
.USER_W(AXIS_PCIE_RQ_USER_W)
) m_axis_rq();
taxi_axis_if #(
.DATA_W(AXIS_PCIE_DATA_W),
.KEEP_EN(1'b1),
.KEEP_W(AXIS_PCIE_KEEP_W),
.LAST_EN(1'b1),
.ID_EN(1'b0),
.DEST_EN(1'b0),
.USER_EN(1'b1),
.USER_W(AXIS_PCIE_RC_USER_W)
) s_axis_rc();
logic [RQ_SEQ_NUM_W-1:0] s_axis_rq_seq_num_0;
logic s_axis_rq_seq_num_valid_0;
logic [RQ_SEQ_NUM_W-1:0] s_axis_rq_seq_num_1;
logic s_axis_rq_seq_num_valid_1;
logic [7:0] pcie_tx_fc_nph_av;
logic [7:0] pcie_tx_fc_ph_av;
logic [11:0] pcie_tx_fc_pd_av;
taxi_dma_desc_if #(
.SRC_ADDR_W(PCIE_ADDR_W),
.SRC_SEL_EN(1'b0),
.SRC_ASID_EN(1'b0),
.DST_ADDR_W(RAM_ADDR_W),
.DST_SEL_EN(1'b1),
.DST_SEL_W(RAM_SEL_W),
.DST_ASID_EN(1'b0),
.IMM_EN(1'b0),
.LEN_W(LEN_W),
.TAG_W(TAG_W),
.ID_EN(1'b0),
.DEST_EN(1'b0),
.USER_EN(1'b0)
) rd_desc();
taxi_dma_desc_if #(
.SRC_ADDR_W(RAM_ADDR_W),
.SRC_SEL_EN(1'b1),
.SRC_SEL_W(RAM_SEL_W),
.SRC_ASID_EN(1'b0),
.DST_ADDR_W(PCIE_ADDR_W),
.DST_SEL_EN(1'b0),
.DST_ASID_EN(1'b0),
.IMM_EN(IMM_EN),
.IMM_W(IMM_W),
.LEN_W(LEN_W),
.TAG_W(TAG_W),
.ID_EN(1'b0),
.DEST_EN(1'b0),
.USER_EN(1'b0)
) wr_desc();
taxi_dma_ram_if #(
.SEGS(RAM_SEGS),
.SEG_ADDR_W(RAM_SEG_ADDR_W),
.SEG_DATA_W(RAM_SEG_DATA_W),
.SEG_BE_W(RAM_SEG_BE_W)
) dma_ram();
logic read_enable;
logic write_enable;
logic ext_tag_en;
logic rcb_128b;
logic [15:0] requester_id;
logic requester_id_en;
logic [2:0] max_rd_req_size;
logic [2:0] max_payload_size;
logic stat_rd_busy;
logic stat_wr_busy;
logic stat_err_cor;
logic stat_err_uncor;
logic [$clog2(RD_OP_TBL_SIZE)-1:0] stat_rd_op_start_tag;
logic stat_rd_op_start_valid;
logic [$clog2(RD_OP_TBL_SIZE)-1:0] stat_rd_op_finish_tag;
logic [3:0] stat_rd_op_finish_status;
logic stat_rd_op_finish_valid;
logic [$clog2(PCIE_TAG_CNT)-1:0] stat_rd_req_start_tag;
logic [12:0] stat_rd_req_start_len;
logic stat_rd_req_start_valid;
logic [$clog2(PCIE_TAG_CNT)-1:0] stat_rd_req_finish_tag;
logic [3:0] stat_rd_req_finish_status;
logic stat_rd_req_finish_valid;
logic stat_rd_req_timeout;
logic stat_rd_op_tbl_full;
logic stat_rd_no_tags;
logic stat_rd_tx_limit;
logic stat_rd_tx_stall;
logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_op_start_tag;
logic stat_wr_op_start_valid;
logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_op_finish_tag;
logic [3:0] stat_wr_op_finish_status;
logic stat_wr_op_finish_valid;
logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_req_start_tag;
logic [12:0] stat_wr_req_start_len;
logic stat_wr_req_start_valid;
logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_req_finish_tag;
logic [3:0] stat_wr_req_finish_status;
logic stat_wr_req_finish_valid;
logic stat_wr_op_tbl_full;
logic stat_wr_tx_limit;
logic stat_wr_tx_stall;
taxi_dma_if_pcie_us #(
.RQ_SEQ_NUM_W(RQ_SEQ_NUM_W),
.RQ_SEQ_NUM_EN(RQ_SEQ_NUM_EN),
.PCIE_TAG_CNT(PCIE_TAG_CNT),
.RD_OP_TBL_SIZE(RD_OP_TBL_SIZE),
.RD_TX_LIMIT(RD_TX_LIMIT),
.RD_TX_FC_EN(RD_TX_FC_EN),
.RD_CPLH_FC_LIMIT(RD_CPLH_FC_LIMIT),
.RD_CPLD_FC_LIMIT(RD_CPLD_FC_LIMIT),
.WR_OP_TBL_SIZE(WR_OP_TBL_SIZE),
.WR_TX_LIMIT(WR_TX_LIMIT),
.WR_TX_FC_EN(WR_TX_FC_EN)
)
uut (
.clk(clk),
.rst(rst),
/*
* UltraScale PCIe interface
*/
.m_axis_rq(m_axis_rq),
.s_axis_rc(s_axis_rc),
/*
* Transmit sequence number input
*/
.s_axis_rq_seq_num_0(s_axis_rq_seq_num_0),
.s_axis_rq_seq_num_valid_0(s_axis_rq_seq_num_valid_0),
.s_axis_rq_seq_num_1(s_axis_rq_seq_num_1),
.s_axis_rq_seq_num_valid_1(s_axis_rq_seq_num_valid_1),
/*
* Transmit flow control
*/
.pcie_tx_fc_nph_av(pcie_tx_fc_nph_av),
.pcie_tx_fc_ph_av(pcie_tx_fc_ph_av),
.pcie_tx_fc_pd_av(pcie_tx_fc_pd_av),
/*
* Read descriptor
*/
.rd_desc_req(rd_desc),
.rd_desc_sts(rd_desc),
/*
* Write descriptor
*/
.wr_desc_req(wr_desc),
.wr_desc_sts(wr_desc),
/*
* RAM interface
*/
.dma_ram_wr(dma_ram),
.dma_ram_rd(dma_ram),
/*
* Configuration
*/
.read_enable(read_enable),
.write_enable(write_enable),
.ext_tag_en(ext_tag_en),
.rcb_128b(rcb_128b),
.requester_id(requester_id),
.requester_id_en(requester_id_en),
.max_rd_req_size(max_rd_req_size),
.max_payload_size(max_payload_size),
/*
* Status
*/
.stat_rd_busy(stat_rd_busy),
.stat_wr_busy(stat_wr_busy),
.stat_err_cor(stat_err_cor),
.stat_err_uncor(stat_err_uncor),
/*
* Statistics
*/
.stat_rd_op_start_tag(stat_rd_op_start_tag),
.stat_rd_op_start_valid(stat_rd_op_start_valid),
.stat_rd_op_finish_tag(stat_rd_op_finish_tag),
.stat_rd_op_finish_status(stat_rd_op_finish_status),
.stat_rd_op_finish_valid(stat_rd_op_finish_valid),
.stat_rd_req_start_tag(stat_rd_req_start_tag),
.stat_rd_req_start_len(stat_rd_req_start_len),
.stat_rd_req_start_valid(stat_rd_req_start_valid),
.stat_rd_req_finish_tag(stat_rd_req_finish_tag),
.stat_rd_req_finish_status(stat_rd_req_finish_status),
.stat_rd_req_finish_valid(stat_rd_req_finish_valid),
.stat_rd_req_timeout(stat_rd_req_timeout),
.stat_rd_op_tbl_full(stat_rd_op_tbl_full),
.stat_rd_no_tags(stat_rd_no_tags),
.stat_rd_tx_limit(stat_rd_tx_limit),
.stat_rd_tx_stall(stat_rd_tx_stall),
.stat_wr_op_start_tag(stat_wr_op_start_tag),
.stat_wr_op_start_valid(stat_wr_op_start_valid),
.stat_wr_op_finish_tag(stat_wr_op_finish_tag),
.stat_wr_op_finish_status(stat_wr_op_finish_status),
.stat_wr_op_finish_valid(stat_wr_op_finish_valid),
.stat_wr_req_start_tag(stat_wr_req_start_tag),
.stat_wr_req_start_len(stat_wr_req_start_len),
.stat_wr_req_start_valid(stat_wr_req_start_valid),
.stat_wr_req_finish_tag(stat_wr_req_finish_tag),
.stat_wr_req_finish_status(stat_wr_req_finish_status),
.stat_wr_req_finish_valid(stat_wr_req_finish_valid),
.stat_wr_op_tbl_full(stat_wr_op_tbl_full),
.stat_wr_tx_limit(stat_wr_tx_limit),
.stat_wr_tx_stall(stat_wr_tx_stall)
);
endmodule
`resetall

View File

@@ -0,0 +1,71 @@
# SPDX-License-Identifier: CERN-OHL-S-2.0
#
# Copyright (c) 2020-2025 FPGA Ninja, LLC
#
# Authors:
# - Alex Forencich
TOPLEVEL_LANG = verilog
SIM ?= verilator
WAVES ?= 0
COCOTB_HDL_TIMEUNIT = 1ns
COCOTB_HDL_TIMEPRECISION = 1ps
RTL_DIR = ../../rtl
LIB_DIR = ../../lib
TAXI_SRC_DIR = $(LIB_DIR)/taxi/src
DUT = taxi_dma_if_pcie_us_rd
COCOTB_TEST_MODULES = test_$(DUT)
COCOTB_TOPLEVEL = test_$(DUT)
MODULE = $(COCOTB_TEST_MODULES)
TOPLEVEL = $(COCOTB_TOPLEVEL)
VERILOG_SOURCES += $(COCOTB_TOPLEVEL).sv
VERILOG_SOURCES += $(RTL_DIR)/$(DUT).sv
VERILOG_SOURCES += $(RTL_DIR)/taxi_dma_desc_if.sv
VERILOG_SOURCES += $(RTL_DIR)/taxi_dma_ram_if.sv
VERILOG_SOURCES += $(TAXI_SRC_DIR)/axis/rtl/taxi_axis_if.sv
# handle file list files
process_f_file = $(call process_f_files,$(addprefix $(dir $1),$(shell cat $1)))
process_f_files = $(foreach f,$1,$(if $(filter %.f,$f),$(call process_f_file,$f),$f))
uniq_base = $(if $1,$(call uniq_base,$(foreach f,$1,$(if $(filter-out $(notdir $(lastword $1)),$(notdir $f)),$f,))) $(lastword $1))
VERILOG_SOURCES := $(call uniq_base,$(call process_f_files,$(VERILOG_SOURCES)))
# module parameters
export PARAM_AXIS_PCIE_DATA_W := 64
export PARAM_AXIS_PCIE_KEEP_W := $(shell expr $(PARAM_AXIS_PCIE_DATA_W) / 32 )
export PARAM_AXIS_PCIE_RQ_USER_W := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_W)),62,137)
export PARAM_AXIS_PCIE_RC_USER_W := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_W)),75,161)
export PARAM_RQ_SEQ_NUM_W := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_W)),6,4)
export PARAM_RQ_SEQ_NUM_EN := 1
export PARAM_RAM_SEL_W := 2
export PARAM_RAM_ADDR_W := 16
export PARAM_RAM_SEGS := $(shell python -c "print(max(2, $(PARAM_AXIS_PCIE_DATA_W) * 2 // 128))")
export PARAM_PCIE_TAG_CNT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_W)),256,64)
export PARAM_IMM_EN := 1
export PARAM_IMM_W := $(PARAM_AXIS_PCIE_DATA_W)
export PARAM_LEN_W := 20
export PARAM_TAG_W := 8
export PARAM_OP_TBL_SIZE := $(PARAM_PCIE_TAG_CNT)
export PARAM_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_W)-1) ))" )
export PARAM_TX_FC_EN := 1
export PARAM_CPLH_FC_LIMIT := 512
export PARAM_CPLD_FC_LIMIT := $(shell expr $(PARAM_CPLH_FC_LIMIT) \* 4 )
ifeq ($(SIM), icarus)
PLUSARGS += -fst
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(COCOTB_TOPLEVEL).$(subst PARAM_,,$(v))=$($(v)))
else ifeq ($(SIM), verilator)
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v)))
ifeq ($(WAVES), 1)
COMPILE_ARGS += --trace-fst
VERILATOR_TRACE = 1
endif
endif
include $(shell cocotb-config --makefiles)/Makefile.sim

View File

@@ -0,0 +1 @@
../dma_psdp_ram.py

View File

@@ -0,0 +1,404 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: CERN-OHL-S-2.0
"""
Copyright (c) 2020-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
"""
import itertools
import logging
import os
import sys
import cocotb_test.simulator
import pytest
import cocotb
from cocotb.triggers import RisingEdge, FallingEdge, Timer
from cocotb.regression import TestFactory
from cocotbext.axi import AxiStreamBus
from cocotbext.pcie.core import RootComplex
from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice
from cocotbext.axi.stream import define_stream
from cocotbext.axi.utils import hexdump_str
try:
from dma_psdp_ram import PsdpRamWrite, PsdpRamWriteBus
except ImportError:
# attempt import from current directory
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))
try:
from dma_psdp_ram import PsdpRamWrite, PsdpRamWriteBus
finally:
del sys.path[0]
DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc",
signals=["req_src_addr", "req_src_sel", "req_src_asid", "req_dst_addr", "req_dst_sel", "req_dst_asid", "req_len", "req_tag", "req_valid", "req_ready"],
optional_signals=["req_imm", "req_imm_en", "req_id", "req_dest", "req_user"]
)
DescStatusBus, DescStatusTransaction, DescStatusSource, DescStatusSink, DescStatusMonitor = define_stream("DescStatus",
signals=["sts_tag", "sts_error", "sts_valid"],
optional_signals=["sts_len", "sts_id", "sts_dest", "sts_user"]
)
class TB(object):
def __init__(self, dut):
self.dut = dut
self.log = logging.getLogger("cocotb.tb")
self.log.setLevel(logging.DEBUG)
# PCIe
self.rc = RootComplex()
self.dev = UltraScalePlusPcieDevice(
# configuration options
pcie_generation=3,
# pcie_link_width=2,
# user_clk_frequency=250e6,
alignment="dword",
cq_straddle=False,
cc_straddle=False,
rq_straddle=False,
rc_straddle=False,
rc_4tlp_straddle=False,
pf_count=1,
max_payload_size=1024,
enable_client_tag=True,
enable_extended_tag=True,
enable_parity=False,
enable_rx_msg_interface=False,
enable_sriov=False,
enable_extended_configuration=False,
pf0_msi_enable=True,
pf0_msi_count=32,
pf1_msi_enable=False,
pf1_msi_count=1,
pf2_msi_enable=False,
pf2_msi_count=1,
pf3_msi_enable=False,
pf3_msi_count=1,
pf0_msix_enable=False,
pf0_msix_table_size=0,
pf0_msix_table_bir=0,
pf0_msix_table_offset=0x00000000,
pf0_msix_pba_bir=0,
pf0_msix_pba_offset=0x00000000,
pf1_msix_enable=False,
pf1_msix_table_size=0,
pf1_msix_table_bir=0,
pf1_msix_table_offset=0x00000000,
pf1_msix_pba_bir=0,
pf1_msix_pba_offset=0x00000000,
pf2_msix_enable=False,
pf2_msix_table_size=0,
pf2_msix_table_bir=0,
pf2_msix_table_offset=0x00000000,
pf2_msix_pba_bir=0,
pf2_msix_pba_offset=0x00000000,
pf3_msix_enable=False,
pf3_msix_table_size=0,
pf3_msix_table_bir=0,
pf3_msix_table_offset=0x00000000,
pf3_msix_pba_bir=0,
pf3_msix_pba_offset=0x00000000,
# signals
user_clk=dut.clk,
user_reset=dut.rst,
rq_bus=AxiStreamBus.from_entity(dut.m_axis_rq),
pcie_rq_seq_num0=dut.s_axis_rq_seq_num_0,
pcie_rq_seq_num_vld0=dut.s_axis_rq_seq_num_valid_0,
pcie_rq_seq_num1=dut.s_axis_rq_seq_num_1,
pcie_rq_seq_num_vld1=dut.s_axis_rq_seq_num_valid_1,
rc_bus=AxiStreamBus.from_entity(dut.s_axis_rc),
cfg_max_read_req=dut.max_rd_req_size,
cfg_fc_sel=0b100,
cfg_fc_nph=dut.pcie_tx_fc_nph_av,
)
self.dev.log.setLevel(logging.DEBUG)
self.rc.make_port().connect(self.dev)
# DMA RAM
self.dma_ram = PsdpRamWrite(PsdpRamWriteBus.from_entity(dut.dma_ram), dut.clk, dut.rst, size=2**16)
# Control
self.read_desc_source = DescSource(DescBus.from_entity(dut.rd_desc), dut.clk, dut.rst)
self.read_desc_status_sink = DescStatusSink(DescStatusBus.from_entity(dut.rd_desc), dut.clk, dut.rst)
dut.requester_id.setimmediatevalue(0)
dut.requester_id_en.setimmediatevalue(0)
dut.ext_tag_en.setimmediatevalue(0)
dut.rcb_128b.setimmediatevalue(0)
dut.enable.setimmediatevalue(0)
# monitor error outputs
self.stat_err_cor_asserted = False
self.stat_err_uncor_asserted = False
cocotb.start_soon(self._run_monitor_stat_err_cor())
cocotb.start_soon(self._run_monitor_stat_err_uncor())
def set_idle_generator(self, generator=None):
if generator:
self.dev.rc_source.set_pause_generator(generator())
def set_backpressure_generator(self, generator=None):
if generator:
self.dev.rq_sink.set_pause_generator(generator())
self.dma_ram.set_pause_generator(generator())
async def _run_monitor_stat_err_cor(self):
while True:
await RisingEdge(self.dut.stat_err_cor)
self.log.info("stat_err_cor (correctable error) was asserted")
self.stat_err_cor_asserted = True
async def _run_monitor_stat_err_uncor(self):
while True:
await RisingEdge(self.dut.stat_err_uncor)
self.log.info("stat_err_uncor (uncorrectable error) was asserted")
self.stat_err_uncor_asserted = True
async def run_test_read(dut, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
if os.getenv("PCIE_OFFSET") is None:
pcie_offsets = list(range(4))+list(range(4096-4, 4096))
else:
pcie_offsets = [int(os.getenv("PCIE_OFFSET"))]
byte_lanes = tb.dma_ram.byte_lanes
tag_count = 2**len(tb.read_desc_source.bus.req_tag)
cur_tag = 1
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
await FallingEdge(dut.rst)
await Timer(100, 'ns')
await tb.rc.enumerate()
dev = tb.rc.find_device(tb.dev.functions[0].pcie_id)
await dev.enable_device()
await dev.set_master()
mem = tb.rc.mem_pool.alloc_region(16*1024*1024)
mem_base = mem.get_absolute_address(0)
tb.dut.enable.value = 1
for length in list(range(0, byte_lanes+3))+list(range(128-4, 128+4))+[1024]:
for pcie_offset in pcie_offsets:
for ram_offset in range(byte_lanes+1):
tb.log.info("length %d, pcie_offset %d, ram_offset %d", length, pcie_offset, ram_offset)
pcie_addr = pcie_offset+0x1000
ram_addr = ram_offset+0x1000
test_data = bytearray([x % 256 for x in range(length)])
mem[pcie_addr:pcie_addr+len(test_data)] = test_data
tb.log.debug("%s", hexdump_str(mem, (pcie_addr & ~0xf)-16, (((pcie_addr & 0xf)+length-1) & ~0xf)+48, prefix="PCIe "))
tb.dma_ram.write(ram_addr-256, b'\xaa'*(len(test_data)+512))
desc = DescTransaction(req_src_addr=mem_base+pcie_addr, req_dst_addr=ram_addr, req_dst_sel=0, req_len=len(test_data), req_tag=cur_tag)
await tb.read_desc_source.send(desc)
status = await tb.read_desc_status_sink.recv()
tb.log.info("status: %s", status)
assert int(status.sts_tag) == cur_tag
assert int(status.sts_error) == 0
tb.log.debug("%s", tb.dma_ram.hexdump_str((ram_addr & ~0xf)-16, (((ram_addr & 0xf)+length-1) & ~0xf)+48, prefix="RAM "))
assert tb.dma_ram.read(ram_addr-8, len(test_data)+16) == b'\xaa'*8+test_data+b'\xaa'*8
cur_tag = (cur_tag + 1) % tag_count
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
async def run_test_read_errors(dut, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
tag_count = 2**len(tb.read_desc_source.bus.req_tag)
cur_tag = 1
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
await FallingEdge(dut.rst)
await Timer(100, 'ns')
await tb.rc.enumerate()
dev = tb.rc.find_device(tb.dev.functions[0].pcie_id)
await dev.enable_device()
await dev.set_master()
mem = tb.rc.mem_pool.alloc_region(16*1024*1024)
mem_base = mem.get_absolute_address(0)
tb.dut.enable.value = 1
tb.log.info("Test bad DMA read (UR) short")
desc = DescTransaction(req_src_addr=mem_base-512, req_dst_addr=0, req_dst_sel=0, req_len=8, req_tag=cur_tag)
await tb.read_desc_source.send(desc)
status = await tb.read_desc_status_sink.recv()
tb.log.info("status: %s", status)
assert int(status.sts_tag) == cur_tag
assert int(status.sts_error) in {10, 11}
cur_tag = (cur_tag + 1) % tag_count
tb.log.info("Test bad DMA read (UR) first")
desc = DescTransaction(req_src_addr=mem_base-512, req_dst_addr=0, req_dst_sel=0, req_len=1024, req_tag=cur_tag)
await tb.read_desc_source.send(desc)
status = await tb.read_desc_status_sink.recv()
tb.log.info("status: %s", status)
assert int(status.sts_tag) == cur_tag
assert int(status.sts_error) in {10, 11}
cur_tag = (cur_tag + 1) % tag_count
tb.log.info("Test bad DMA read (UR) last")
desc = DescTransaction(req_src_addr=mem_base+16*1024*1024-512, req_dst_addr=0, req_dst_sel=0, req_len=1024, req_tag=cur_tag)
await tb.read_desc_source.send(desc)
status = await tb.read_desc_status_sink.recv()
tb.log.info("status: %s", status)
assert int(status.sts_tag) == cur_tag
assert int(status.sts_error) in {10, 11}
cur_tag = (cur_tag + 1) % tag_count
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
def cycle_pause():
return itertools.cycle([1, 1, 1, 0])
if getattr(cocotb, 'top', None) is not None:
for test in [
run_test_read,
run_test_read_errors,
]:
factory = TestFactory(test)
factory.add_option(("idle_inserter", "backpressure_inserter"), [(None, None), (cycle_pause, cycle_pause)])
factory.generate_tests()
# cocotb-test
tests_dir = os.path.dirname(__file__)
rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl'))
lib_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'lib'))
taxi_src_dir = os.path.abspath(os.path.join(lib_dir, 'taxi', 'src'))
def process_f_files(files):
lst = {}
for f in files:
if f[-2:].lower() == '.f':
with open(f, 'r') as fp:
l = fp.read().split()
for f in process_f_files([os.path.join(os.path.dirname(f), x) for x in l]):
lst[os.path.basename(f)] = f
else:
lst[os.path.basename(f)] = f
return list(lst.values())
@pytest.mark.parametrize("pcie_offset", list(range(4))+list(range(4096-4, 4096)))
@pytest.mark.parametrize("axis_pcie_data_w", [64, 128, 256, 512])
def test_taxi_dma_if_pcie_us_rd(request, axis_pcie_data_w, pcie_offset):
dut = "taxi_dma_if_pcie_us_rd"
module = os.path.splitext(os.path.basename(__file__))[0]
toplevel = module
verilog_sources = [
os.path.join(tests_dir, f"{toplevel}.sv"),
os.path.join(rtl_dir, f"{dut}.sv"),
os.path.join(rtl_dir, "taxi_dma_desc_if.sv"),
os.path.join(rtl_dir, "taxi_dma_ram_if.sv"),
os.path.join(taxi_src_dir, "axis", "rtl", "taxi_axis_if.sv"),
]
verilog_sources = process_f_files(verilog_sources)
parameters = {}
parameters['AXIS_PCIE_DATA_W'] = axis_pcie_data_w
parameters['AXIS_PCIE_KEEP_W'] = parameters['AXIS_PCIE_DATA_W'] // 32
parameters['AXIS_PCIE_RQ_USER_W'] = 62 if parameters['AXIS_PCIE_DATA_W'] < 512 else 137
parameters['AXIS_PCIE_RC_USER_W'] = 75 if parameters['AXIS_PCIE_DATA_W'] < 512 else 161
parameters['RQ_SEQ_NUM_W'] = 4 if parameters['AXIS_PCIE_RQ_USER_W'] == 60 else 6
parameters['RQ_SEQ_NUM_EN'] = 1
parameters['RAM_SEL_W'] = 2
parameters['RAM_ADDR_W'] = 16
parameters['RAM_SEGS'] = max(2, parameters['AXIS_PCIE_DATA_W']*2 // 128)
parameters['PCIE_TAG_CNT'] = 64 if parameters['AXIS_PCIE_RQ_USER_W'] == 60 else 256
parameters['LEN_W'] = 20
parameters['TAG_W'] = 8
parameters['OP_TBL_SIZE'] = parameters['PCIE_TAG_CNT']
parameters['TX_LIMIT'] = 2**(parameters['RQ_SEQ_NUM_W']-1)
parameters['TX_FC_EN'] = 1
parameters['CPLH_FC_LIMIT'] = 512
parameters['CPLD_FC_LIMIT'] = parameters['CPLH_FC_LIMIT']*4
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
extra_env['PCIE_OFFSET'] = str(pcie_offset)
sim_build = os.path.join(tests_dir, "sim_build",
request.node.name.replace('[', '-').replace(']', ''))
cocotb_test.simulator.run(
simulator="verilator",
python_search=[tests_dir],
verilog_sources=verilog_sources,
toplevel=toplevel,
module=module,
parameters=parameters,
sim_build=sim_build,
extra_env=extra_env,
)

View File

@@ -0,0 +1,218 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* UltraScale PCIe DMA interface testbench
*/
module test_taxi_dma_if_pcie_us_rd #
(
/* verilator lint_off WIDTHTRUNC */
parameter AXIS_PCIE_DATA_W = 64,
parameter AXIS_PCIE_KEEP_W = AXIS_PCIE_DATA_W / 32,
parameter AXIS_PCIE_RQ_USER_W = AXIS_PCIE_DATA_W < 512 ? 62 : 137,
parameter AXIS_PCIE_RC_USER_W = AXIS_PCIE_DATA_W < 512 ? 75 : 161,
parameter RQ_SEQ_NUM_W = AXIS_PCIE_RQ_USER_W == 60 ? 4 : 6,
parameter logic RQ_SEQ_NUM_EN = 1'b1,
parameter RAM_SEL_W = 2,
parameter RAM_ADDR_W = 16,
parameter RAM_SEGS = AXIS_PCIE_DATA_W > 256 ? AXIS_PCIE_DATA_W / 128 : 2,
parameter PCIE_TAG_CNT = AXIS_PCIE_RQ_USER_W == 60 ? 64 : 256,
parameter logic IMM_EN = 1,
parameter IMM_W = AXIS_PCIE_DATA_W,
parameter LEN_W = 20,
parameter TAG_W = 8,
parameter OP_TBL_SIZE = PCIE_TAG_CNT,
parameter TX_LIMIT = 2**(RQ_SEQ_NUM_W-1),
parameter logic TX_FC_EN = 1'b1,
parameter CPLH_FC_LIMIT = 512,
parameter CPLD_FC_LIMIT = CPLH_FC_LIMIT*4
/* verilator lint_on WIDTHTRUNC */
)
();
localparam PCIE_ADDR_W = 64;
localparam RAM_DATA_W = AXIS_PCIE_DATA_W*2;
localparam RAM_SEG_DATA_W = RAM_DATA_W / RAM_SEGS;
localparam RAM_SEG_BE_W = RAM_SEG_DATA_W / 8;
localparam RAM_SEG_ADDR_W = RAM_ADDR_W - $clog2(RAM_SEGS*RAM_SEG_BE_W);
logic clk;
logic rst;
taxi_axis_if #(
.DATA_W(AXIS_PCIE_DATA_W),
.KEEP_EN(1'b1),
.KEEP_W(AXIS_PCIE_KEEP_W),
.LAST_EN(1'b1),
.ID_EN(1'b0),
.DEST_EN(1'b0),
.USER_EN(1'b1),
.USER_W(AXIS_PCIE_RQ_USER_W)
) m_axis_rq();
taxi_axis_if #(
.DATA_W(AXIS_PCIE_DATA_W),
.KEEP_EN(1'b1),
.KEEP_W(AXIS_PCIE_KEEP_W),
.LAST_EN(1'b1),
.ID_EN(1'b0),
.DEST_EN(1'b0),
.USER_EN(1'b1),
.USER_W(AXIS_PCIE_RC_USER_W)
) s_axis_rc();
logic [RQ_SEQ_NUM_W-1:0] s_axis_rq_seq_num_0;
logic s_axis_rq_seq_num_valid_0;
logic [RQ_SEQ_NUM_W-1:0] s_axis_rq_seq_num_1;
logic s_axis_rq_seq_num_valid_1;
logic [7:0] pcie_tx_fc_nph_av;
taxi_dma_desc_if #(
.SRC_ADDR_W(PCIE_ADDR_W),
.SRC_SEL_EN(1'b0),
.SRC_ASID_EN(1'b0),
.DST_ADDR_W(RAM_ADDR_W),
.DST_SEL_EN(1'b1),
.DST_SEL_W(RAM_SEL_W),
.DST_ASID_EN(1'b0),
.IMM_EN(1'b0),
.LEN_W(LEN_W),
.TAG_W(TAG_W),
.ID_EN(1'b0),
.DEST_EN(1'b0),
.USER_EN(1'b0)
) rd_desc();
taxi_dma_ram_if #(
.SEGS(RAM_SEGS),
.SEG_ADDR_W(RAM_SEG_ADDR_W),
.SEG_DATA_W(RAM_SEG_DATA_W),
.SEG_BE_W(RAM_SEG_BE_W)
) dma_ram();
logic enable;
logic ext_tag_en;
logic rcb_128b;
logic [15:0] requester_id;
logic requester_id_en;
logic [2:0] max_rd_req_size;
logic stat_busy;
logic stat_err_cor;
logic stat_err_uncor;
logic [$clog2(OP_TBL_SIZE)-1:0] stat_rd_op_start_tag;
logic stat_rd_op_start_valid;
logic [$clog2(OP_TBL_SIZE)-1:0] stat_rd_op_finish_tag;
logic [3:0] stat_rd_op_finish_status;
logic stat_rd_op_finish_valid;
logic [$clog2(PCIE_TAG_CNT)-1:0] stat_rd_req_start_tag;
logic [12:0] stat_rd_req_start_len;
logic stat_rd_req_start_valid;
logic [$clog2(PCIE_TAG_CNT)-1:0] stat_rd_req_finish_tag;
logic [3:0] stat_rd_req_finish_status;
logic stat_rd_req_finish_valid;
logic stat_rd_req_timeout;
logic stat_rd_op_tbl_full;
logic stat_rd_no_tags;
logic stat_rd_tx_limit;
logic stat_rd_tx_stall;
taxi_dma_if_pcie_us_rd #(
.RQ_SEQ_NUM_W(RQ_SEQ_NUM_W),
.RQ_SEQ_NUM_EN(RQ_SEQ_NUM_EN),
.PCIE_TAG_CNT(PCIE_TAG_CNT),
.OP_TBL_SIZE(OP_TBL_SIZE),
.TX_LIMIT(TX_LIMIT),
.TX_FC_EN(TX_FC_EN),
.CPLH_FC_LIMIT(CPLH_FC_LIMIT),
.CPLD_FC_LIMIT(CPLD_FC_LIMIT)
)
uut (
.clk(clk),
.rst(rst),
/*
* UltraScale PCIe interface
*/
.m_axis_rq(m_axis_rq),
.s_axis_rc(s_axis_rc),
/*
* Transmit sequence number input
*/
.s_axis_rq_seq_num_0(s_axis_rq_seq_num_0),
.s_axis_rq_seq_num_valid_0(s_axis_rq_seq_num_valid_0),
.s_axis_rq_seq_num_1(s_axis_rq_seq_num_1),
.s_axis_rq_seq_num_valid_1(s_axis_rq_seq_num_valid_1),
/*
* Transmit flow control
*/
.pcie_tx_fc_nph_av(pcie_tx_fc_nph_av),
/*
* Read descriptor
*/
.rd_desc_req(rd_desc),
.rd_desc_sts(rd_desc),
/*
* RAM interface
*/
.dma_ram_wr(dma_ram),
/*
* Configuration
*/
.enable(enable),
.ext_tag_en(ext_tag_en),
.rcb_128b(rcb_128b),
.requester_id(requester_id),
.requester_id_en(requester_id_en),
.max_rd_req_size(max_rd_req_size),
/*
* Status
*/
.stat_busy(stat_busy),
.stat_err_cor(stat_err_cor),
.stat_err_uncor(stat_err_uncor),
/*
* Statistics
*/
.stat_rd_op_start_tag(stat_rd_op_start_tag),
.stat_rd_op_start_valid(stat_rd_op_start_valid),
.stat_rd_op_finish_tag(stat_rd_op_finish_tag),
.stat_rd_op_finish_status(stat_rd_op_finish_status),
.stat_rd_op_finish_valid(stat_rd_op_finish_valid),
.stat_rd_req_start_tag(stat_rd_req_start_tag),
.stat_rd_req_start_len(stat_rd_req_start_len),
.stat_rd_req_start_valid(stat_rd_req_start_valid),
.stat_rd_req_finish_tag(stat_rd_req_finish_tag),
.stat_rd_req_finish_status(stat_rd_req_finish_status),
.stat_rd_req_finish_valid(stat_rd_req_finish_valid),
.stat_rd_req_timeout(stat_rd_req_timeout),
.stat_rd_op_tbl_full(stat_rd_op_tbl_full),
.stat_rd_no_tags(stat_rd_no_tags),
.stat_rd_tx_limit(stat_rd_tx_limit),
.stat_rd_tx_stall(stat_rd_tx_stall)
);
endmodule
`resetall

View File

@@ -0,0 +1,68 @@
# SPDX-License-Identifier: CERN-OHL-S-2.0
#
# Copyright (c) 2020-2025 FPGA Ninja, LLC
#
# Authors:
# - Alex Forencich
TOPLEVEL_LANG = verilog
SIM ?= verilator
WAVES ?= 0
COCOTB_HDL_TIMEUNIT = 1ns
COCOTB_HDL_TIMEPRECISION = 1ps
RTL_DIR = ../../rtl
LIB_DIR = ../../lib
TAXI_SRC_DIR = $(LIB_DIR)/taxi/src
DUT = taxi_dma_if_pcie_us_wr
COCOTB_TEST_MODULES = test_$(DUT)
COCOTB_TOPLEVEL = test_$(DUT)
MODULE = $(COCOTB_TEST_MODULES)
TOPLEVEL = $(COCOTB_TOPLEVEL)
VERILOG_SOURCES += $(COCOTB_TOPLEVEL).sv
VERILOG_SOURCES += $(RTL_DIR)/$(DUT).sv
VERILOG_SOURCES += $(RTL_DIR)/taxi_dma_desc_if.sv
VERILOG_SOURCES += $(RTL_DIR)/taxi_dma_ram_if.sv
VERILOG_SOURCES += $(TAXI_SRC_DIR)/axis/rtl/taxi_axis_if.sv
# handle file list files
process_f_file = $(call process_f_files,$(addprefix $(dir $1),$(shell cat $1)))
process_f_files = $(foreach f,$1,$(if $(filter %.f,$f),$(call process_f_file,$f),$f))
uniq_base = $(if $1,$(call uniq_base,$(foreach f,$1,$(if $(filter-out $(notdir $(lastword $1)),$(notdir $f)),$f,))) $(lastword $1))
VERILOG_SOURCES := $(call uniq_base,$(call process_f_files,$(VERILOG_SOURCES)))
# module parameters
export PARAM_AXIS_PCIE_DATA_W := 64
export PARAM_AXIS_PCIE_KEEP_W := $(shell expr $(PARAM_AXIS_PCIE_DATA_W) / 32 )
export PARAM_AXIS_PCIE_RQ_USER_W := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_W)),62,137)
export PARAM_RQ_SEQ_NUM_W := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_W)),6,4)
export PARAM_RQ_SEQ_NUM_EN := 1
export PARAM_RAM_SEL_W := 2
export PARAM_RAM_ADDR_W := 16
export PARAM_RAM_SEGS := $(shell python -c "print(max(2, $(PARAM_AXIS_PCIE_DATA_W) * 2 // 128))")
export PARAM_PCIE_TAG_CNT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_W)),256,64)
export PARAM_IMM_EN := 1
export PARAM_IMM_W := $(PARAM_AXIS_PCIE_DATA_W)
export PARAM_LEN_W := 20
export PARAM_TAG_W := 8
export PARAM_OP_TBL_SIZE := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_W)-1) ))" )
export PARAM_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_W)-1) ))" )
export PARAM_TX_FC_EN := 1
ifeq ($(SIM), icarus)
PLUSARGS += -fst
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(COCOTB_TOPLEVEL).$(subst PARAM_,,$(v))=$($(v)))
else ifeq ($(SIM), verilator)
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v)))
ifeq ($(WAVES), 1)
COMPILE_ARGS += --trace-fst
VERILATOR_TRACE = 1
endif
endif
include $(shell cocotb-config --makefiles)/Makefile.sim

View File

@@ -0,0 +1 @@
../dma_psdp_ram.py

View File

@@ -0,0 +1,315 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: CERN-OHL-S-2.0
"""
Copyright (c) 2020-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
"""
import itertools
import logging
import os
import sys
import cocotb_test.simulator
import pytest
import cocotb
from cocotb.triggers import RisingEdge, FallingEdge, Timer
from cocotb.regression import TestFactory
from cocotbext.axi import AxiStreamBus
from cocotbext.pcie.core import RootComplex
from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice
from cocotbext.axi.stream import define_stream
from cocotbext.axi.utils import hexdump_str
try:
from dma_psdp_ram import PsdpRamRead, PsdpRamReadBus
except ImportError:
# attempt import from current directory
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))
try:
from dma_psdp_ram import PsdpRamRead, PsdpRamReadBus
finally:
del sys.path[0]
DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc",
signals=["req_src_addr", "req_src_sel", "req_src_asid", "req_dst_addr", "req_dst_sel", "req_dst_asid", "req_len", "req_tag", "req_valid", "req_ready"],
optional_signals=["req_imm", "req_imm_en", "req_id", "req_dest", "req_user"]
)
DescStatusBus, DescStatusTransaction, DescStatusSource, DescStatusSink, DescStatusMonitor = define_stream("DescStatus",
signals=["sts_tag", "sts_error", "sts_valid"],
optional_signals=["sts_len", "sts_id", "sts_dest", "sts_user"]
)
class TB(object):
def __init__(self, dut):
self.dut = dut
self.log = logging.getLogger("cocotb.tb")
self.log.setLevel(logging.DEBUG)
# PCIe
self.rc = RootComplex()
self.dev = UltraScalePlusPcieDevice(
# configuration options
pcie_generation=3,
# pcie_link_width=2,
# user_clk_frequency=250e6,
alignment="dword",
cq_straddle=False,
cc_straddle=False,
rq_straddle=False,
rc_straddle=False,
rc_4tlp_straddle=False,
pf_count=1,
max_payload_size=1024,
enable_client_tag=True,
enable_extended_tag=True,
enable_parity=False,
enable_rx_msg_interface=False,
enable_sriov=False,
enable_extended_configuration=False,
pf0_msi_enable=True,
pf0_msi_count=32,
pf1_msi_enable=False,
pf1_msi_count=1,
pf2_msi_enable=False,
pf2_msi_count=1,
pf3_msi_enable=False,
pf3_msi_count=1,
pf0_msix_enable=False,
pf0_msix_table_size=0,
pf0_msix_table_bir=0,
pf0_msix_table_offset=0x00000000,
pf0_msix_pba_bir=0,
pf0_msix_pba_offset=0x00000000,
pf1_msix_enable=False,
pf1_msix_table_size=0,
pf1_msix_table_bir=0,
pf1_msix_table_offset=0x00000000,
pf1_msix_pba_bir=0,
pf1_msix_pba_offset=0x00000000,
pf2_msix_enable=False,
pf2_msix_table_size=0,
pf2_msix_table_bir=0,
pf2_msix_table_offset=0x00000000,
pf2_msix_pba_bir=0,
pf2_msix_pba_offset=0x00000000,
pf3_msix_enable=False,
pf3_msix_table_size=0,
pf3_msix_table_bir=0,
pf3_msix_table_offset=0x00000000,
pf3_msix_pba_bir=0,
pf3_msix_pba_offset=0x00000000,
# signals
user_clk=dut.clk,
user_reset=dut.rst,
rq_bus=AxiStreamBus.from_entity(dut.m_axis_rq),
pcie_rq_seq_num0=dut.s_axis_rq_seq_num_0,
pcie_rq_seq_num_vld0=dut.s_axis_rq_seq_num_valid_0,
pcie_rq_seq_num1=dut.s_axis_rq_seq_num_1,
pcie_rq_seq_num_vld1=dut.s_axis_rq_seq_num_valid_1,
cfg_max_payload=dut.max_payload_size,
cfg_fc_sel=0b100,
cfg_fc_ph=dut.pcie_tx_fc_ph_av,
cfg_fc_pd=dut.pcie_tx_fc_pd_av,
)
self.dev.log.setLevel(logging.DEBUG)
self.rc.make_port().connect(self.dev)
# tie off RQ input
dut.s_axis_rq.tdata.setimmediatevalue(0)
dut.s_axis_rq.tkeep.setimmediatevalue(0)
dut.s_axis_rq.tlast.setimmediatevalue(0)
dut.s_axis_rq.tuser.setimmediatevalue(0)
dut.s_axis_rq.tvalid.setimmediatevalue(0)
# DMA RAM
self.dma_ram = PsdpRamRead(PsdpRamReadBus.from_entity(dut.dma_ram), dut.clk, dut.rst, size=2**16)
# Control
self.write_desc_source = DescSource(DescBus.from_entity(dut.wr_desc), dut.clk, dut.rst)
self.write_desc_status_sink = DescStatusSink(DescStatusBus.from_entity(dut.wr_desc), dut.clk, dut.rst)
dut.requester_id.setimmediatevalue(0)
dut.requester_id_en.setimmediatevalue(0)
dut.enable.setimmediatevalue(0)
def set_idle_generator(self, generator=None):
pass
# if generator:
# self.dma_ram.r_channel.set_pause_generator(generator())
def set_backpressure_generator(self, generator=None):
if generator:
self.dev.rq_sink.set_pause_generator(generator())
self.dma_ram.set_pause_generator(generator())
async def run_test_write(dut, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
if os.getenv("PCIE_OFFSET") is None:
pcie_offsets = list(range(4))+list(range(4096-4, 4096))
else:
pcie_offsets = [int(os.getenv("PCIE_OFFSET"))]
byte_lanes = tb.dma_ram.byte_lanes
tag_count = 2**len(tb.write_desc_source.bus.req_tag)
cur_tag = 1
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
await FallingEdge(dut.rst)
await Timer(100, 'ns')
await tb.rc.enumerate()
dev = tb.rc.find_device(tb.dev.functions[0].pcie_id)
await dev.enable_device()
await dev.set_master()
mem = tb.rc.mem_pool.alloc_region(16*1024*1024)
mem_base = mem.get_absolute_address(0)
tb.dut.enable.value = 1
for length in list(range(0, byte_lanes+3))+list(range(128-4, 128+4))+[1024]:
for pcie_offset in pcie_offsets:
for ram_offset in range(byte_lanes+1):
tb.log.info("length %d, pcie_offset %d, ram_offset %d", length, pcie_offset, ram_offset)
pcie_addr = pcie_offset+0x1000
ram_addr = ram_offset+0x1000
test_data = bytearray([x % 256 for x in range(length)])
tb.dma_ram.write(ram_addr & 0xffff80, b'\x55'*(len(test_data)+256))
mem[pcie_addr-128:pcie_addr-128+len(test_data)+256] = b'\xaa'*(len(test_data)+256)
tb.dma_ram.write(ram_addr, test_data)
tb.log.debug("%s", tb.dma_ram.hexdump_str((ram_addr & ~0xf)-16, (((ram_addr & 0xf)+length-1) & ~0xf)+48, prefix="RAM "))
desc = DescTransaction(req_dst_addr=mem_base+pcie_addr, req_src_addr=ram_addr, req_src_sel=0, req_len=len(test_data), req_tag=cur_tag)
await tb.write_desc_source.send(desc)
status = await tb.write_desc_status_sink.recv()
await Timer(100 + (length // byte_lanes), 'ns')
tb.log.info("status: %s", status)
assert int(status.sts_tag) == cur_tag
assert int(status.sts_error) == 0
tb.log.debug("%s", hexdump_str(mem, (pcie_addr & ~0xf)-16, (((pcie_addr & 0xf)+length-1) & ~0xf)+48, prefix="PCIe "))
assert mem[pcie_addr-1:pcie_addr+len(test_data)+1] == b'\xaa'+test_data+b'\xaa'
cur_tag = (cur_tag + 1) % tag_count
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
def cycle_pause():
return itertools.cycle([1, 1, 1, 0])
if getattr(cocotb, 'top', None) is not None:
factory = TestFactory(run_test_write)
factory.add_option(("idle_inserter", "backpressure_inserter"), [(None, None), (cycle_pause, cycle_pause)])
factory.generate_tests()
# cocotb-test
tests_dir = os.path.dirname(__file__)
rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl'))
lib_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'lib'))
taxi_src_dir = os.path.abspath(os.path.join(lib_dir, 'taxi', 'src'))
def process_f_files(files):
lst = {}
for f in files:
if f[-2:].lower() == '.f':
with open(f, 'r') as fp:
l = fp.read().split()
for f in process_f_files([os.path.join(os.path.dirname(f), x) for x in l]):
lst[os.path.basename(f)] = f
else:
lst[os.path.basename(f)] = f
return list(lst.values())
@pytest.mark.parametrize("pcie_offset", list(range(4))+list(range(4096-4, 4096)))
@pytest.mark.parametrize("axis_pcie_data_w", [64, 128, 256, 512])
def test_taxi_dma_if_pcie_us_wr(request, axis_pcie_data_w, pcie_offset):
dut = "taxi_dma_if_pcie_us_wr"
module = os.path.splitext(os.path.basename(__file__))[0]
toplevel = module
verilog_sources = [
os.path.join(tests_dir, f"{toplevel}.sv"),
os.path.join(rtl_dir, f"{dut}.sv"),
os.path.join(rtl_dir, "taxi_dma_desc_if.sv"),
os.path.join(rtl_dir, "taxi_dma_ram_if.sv"),
os.path.join(taxi_src_dir, "axis", "rtl", "taxi_axis_if.sv"),
]
verilog_sources = process_f_files(verilog_sources)
parameters = {}
parameters['AXIS_PCIE_DATA_W'] = axis_pcie_data_w
parameters['AXIS_PCIE_KEEP_W'] = parameters['AXIS_PCIE_DATA_W'] // 32
parameters['AXIS_PCIE_RQ_USER_W'] = 62 if parameters['AXIS_PCIE_DATA_W'] < 512 else 137
parameters['RQ_SEQ_NUM_W'] = 4 if parameters['AXIS_PCIE_RQ_USER_W'] == 60 else 6
parameters['RQ_SEQ_NUM_EN'] = 1
parameters['RAM_SEL_W'] = 2
parameters['RAM_ADDR_W'] = 16
parameters['RAM_SEGS'] = max(2, parameters['AXIS_PCIE_DATA_W']*2 // 128)
parameters['PCIE_TAG_CNT'] = 64 if parameters['AXIS_PCIE_RQ_USER_W'] == 60 else 256
parameters['IMM_EN'] = 1
parameters['IMM_W'] = parameters['AXIS_PCIE_DATA_W']
parameters['LEN_W'] = 20
parameters['TAG_W'] = 8
parameters['OP_TBL_SIZE'] = 2**(parameters['RQ_SEQ_NUM_W']-1)
parameters['TX_LIMIT'] = 2**(parameters['RQ_SEQ_NUM_W']-1)
parameters['TX_FC_EN'] = 1
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
extra_env['PCIE_OFFSET'] = str(pcie_offset)
sim_build = os.path.join(tests_dir, "sim_build",
request.node.name.replace('[', '-').replace(']', ''))
cocotb_test.simulator.run(
simulator="verilator",
python_search=[tests_dir],
verilog_sources=verilog_sources,
toplevel=toplevel,
module=module,
parameters=parameters,
sim_build=sim_build,
extra_env=extra_env,
)

View File

@@ -0,0 +1,195 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* UltraScale PCIe DMA interface testbench
*/
module test_taxi_dma_if_pcie_us_wr #
(
/* verilator lint_off WIDTHTRUNC */
parameter AXIS_PCIE_DATA_W = 64,
parameter AXIS_PCIE_KEEP_W = AXIS_PCIE_DATA_W / 32,
parameter AXIS_PCIE_RQ_USER_W = AXIS_PCIE_DATA_W < 512 ? 62 : 137,
parameter RQ_SEQ_NUM_W = AXIS_PCIE_RQ_USER_W == 60 ? 4 : 6,
parameter logic RQ_SEQ_NUM_EN = 1'b1,
parameter RAM_SEL_W = 2,
parameter RAM_ADDR_W = 16,
parameter RAM_SEGS = AXIS_PCIE_DATA_W > 256 ? AXIS_PCIE_DATA_W / 128 : 2,
parameter PCIE_TAG_CNT = AXIS_PCIE_RQ_USER_W == 60 ? 64 : 256,
parameter logic IMM_EN = 1,
parameter IMM_W = AXIS_PCIE_DATA_W,
parameter LEN_W = 20,
parameter TAG_W = 8,
parameter OP_TBL_SIZE = 2**(RQ_SEQ_NUM_W-1),
parameter TX_LIMIT = 2**(RQ_SEQ_NUM_W-1),
parameter logic TX_FC_EN = 1'b1
/* verilator lint_on WIDTHTRUNC */
)
();
localparam PCIE_ADDR_W = 64;
localparam RAM_DATA_W = AXIS_PCIE_DATA_W*2;
localparam RAM_SEG_DATA_W = RAM_DATA_W / RAM_SEGS;
localparam RAM_SEG_BE_W = RAM_SEG_DATA_W / 8;
localparam RAM_SEG_ADDR_W = RAM_ADDR_W - $clog2(RAM_SEGS*RAM_SEG_BE_W);
logic clk;
logic rst;
taxi_axis_if #(
.DATA_W(AXIS_PCIE_DATA_W),
.KEEP_EN(1'b1),
.KEEP_W(AXIS_PCIE_KEEP_W),
.LAST_EN(1'b1),
.ID_EN(1'b0),
.DEST_EN(1'b0),
.USER_EN(1'b1),
.USER_W(AXIS_PCIE_RQ_USER_W)
) s_axis_rq(), m_axis_rq();
logic [RQ_SEQ_NUM_W-1:0] s_axis_rq_seq_num_0;
logic s_axis_rq_seq_num_valid_0;
logic [RQ_SEQ_NUM_W-1:0] s_axis_rq_seq_num_1;
logic s_axis_rq_seq_num_valid_1;
logic [7:0] pcie_tx_fc_ph_av;
logic [11:0] pcie_tx_fc_pd_av;
taxi_dma_desc_if #(
.SRC_ADDR_W(RAM_ADDR_W),
.SRC_SEL_EN(1'b1),
.SRC_SEL_W(RAM_SEL_W),
.SRC_ASID_EN(1'b0),
.DST_ADDR_W(PCIE_ADDR_W),
.DST_SEL_EN(1'b0),
.DST_ASID_EN(1'b0),
.IMM_EN(IMM_EN),
.IMM_W(IMM_W),
.LEN_W(LEN_W),
.TAG_W(TAG_W),
.ID_EN(1'b0),
.DEST_EN(1'b0),
.USER_EN(1'b0)
) wr_desc();
taxi_dma_ram_if #(
.SEGS(RAM_SEGS),
.SEG_ADDR_W(RAM_SEG_ADDR_W),
.SEG_DATA_W(RAM_SEG_DATA_W),
.SEG_BE_W(RAM_SEG_BE_W)
) dma_ram();
logic enable;
logic ext_tag_en;
logic rcb_128b;
logic [15:0] requester_id;
logic requester_id_en;
logic [2:0] max_rd_req_size;
logic [2:0] max_payload_size;
logic stat_busy;
logic [$clog2(OP_TBL_SIZE)-1:0] stat_wr_op_start_tag;
logic stat_wr_op_start_valid;
logic [$clog2(OP_TBL_SIZE)-1:0] stat_wr_op_finish_tag;
logic [3:0] stat_wr_op_finish_status;
logic stat_wr_op_finish_valid;
logic [$clog2(OP_TBL_SIZE)-1:0] stat_wr_req_start_tag;
logic [12:0] stat_wr_req_start_len;
logic stat_wr_req_start_valid;
logic [$clog2(OP_TBL_SIZE)-1:0] stat_wr_req_finish_tag;
logic [3:0] stat_wr_req_finish_status;
logic stat_wr_req_finish_valid;
logic stat_wr_op_tbl_full;
logic stat_wr_tx_limit;
logic stat_wr_tx_stall;
taxi_dma_if_pcie_us_wr #(
.RQ_SEQ_NUM_W(RQ_SEQ_NUM_W),
.RQ_SEQ_NUM_EN(RQ_SEQ_NUM_EN),
.OP_TBL_SIZE(OP_TBL_SIZE),
.TX_LIMIT(TX_LIMIT),
.TX_FC_EN(TX_FC_EN)
)
uut (
.clk(clk),
.rst(rst),
/*
* UltraScale PCIe interface
*/
.m_axis_rq(m_axis_rq),
.s_axis_rq(s_axis_rq),
/*
* Transmit sequence number input
*/
.s_axis_rq_seq_num_0(s_axis_rq_seq_num_0),
.s_axis_rq_seq_num_valid_0(s_axis_rq_seq_num_valid_0),
.s_axis_rq_seq_num_1(s_axis_rq_seq_num_1),
.s_axis_rq_seq_num_valid_1(s_axis_rq_seq_num_valid_1),
/*
* Transmit flow control
*/
.pcie_tx_fc_ph_av(pcie_tx_fc_ph_av),
.pcie_tx_fc_pd_av(pcie_tx_fc_pd_av),
/*
* Write descriptor
*/
.wr_desc_req(wr_desc),
.wr_desc_sts(wr_desc),
/*
* RAM interface
*/
.dma_ram_rd(dma_ram),
/*
* Configuration
*/
.enable(enable),
.requester_id(requester_id),
.requester_id_en(requester_id_en),
.max_payload_size(max_payload_size),
/*
* Status
*/
.stat_busy(stat_busy),
/*
* Statistics
*/
.stat_wr_op_start_tag(stat_wr_op_start_tag),
.stat_wr_op_start_valid(stat_wr_op_start_valid),
.stat_wr_op_finish_tag(stat_wr_op_finish_tag),
.stat_wr_op_finish_status(stat_wr_op_finish_status),
.stat_wr_op_finish_valid(stat_wr_op_finish_valid),
.stat_wr_req_start_tag(stat_wr_req_start_tag),
.stat_wr_req_start_len(stat_wr_req_start_len),
.stat_wr_req_start_valid(stat_wr_req_start_valid),
.stat_wr_req_finish_tag(stat_wr_req_finish_tag),
.stat_wr_req_finish_status(stat_wr_req_finish_status),
.stat_wr_req_finish_valid(stat_wr_req_finish_valid),
.stat_wr_op_tbl_full(stat_wr_op_tbl_full),
.stat_wr_tx_limit(stat_wr_tx_limit),
.stat_wr_tx_stall(stat_wr_tx_stall)
);
endmodule
`resetall