axis: Add AXI stream pipeline FIFO module and testbench

Signed-off-by: Alex Forencich <alex@alexforencich.com>
This commit is contained in:
Alex Forencich
2025-02-03 16:35:52 -08:00
parent 47e4658b55
commit 9590811570
4 changed files with 701 additions and 0 deletions

View File

@@ -0,0 +1,222 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2021-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* AXI4-Stream pipeline FIFO
*/
module taxi_axis_pipeline_fifo #
(
// Number of registers in pipeline
parameter LENGTH = 2
)
(
input wire logic clk,
input wire logic rst,
/*
* AXI4-Stream input (sink)
*/
taxi_axis_if.snk s_axis,
/*
* AXI4-Stream output (source)
*/
taxi_axis_if.src m_axis
);
// extract parameters
localparam DATA_W = s_axis.DATA_W;
localparam logic KEEP_EN = s_axis.KEEP_EN && m_axis.KEEP_EN;
localparam KEEP_W = s_axis.KEEP_W;
localparam logic STRB_EN = s_axis.STRB_EN && m_axis.STRB_EN;
localparam logic LAST_EN = s_axis.LAST_EN && m_axis.LAST_EN;
localparam logic ID_EN = s_axis.ID_EN && m_axis.ID_EN;
localparam ID_W = s_axis.ID_W;
localparam logic DEST_EN = s_axis.DEST_EN && m_axis.DEST_EN;
localparam DEST_W = s_axis.DEST_W;
localparam logic USER_EN = s_axis.USER_EN && m_axis.USER_EN;
localparam USER_W = s_axis.USER_W;
// check configuration
if (m_axis.DATA_W != DATA_W)
$fatal(0, "Error: Interface DATA_W parameter mismatch (instance %m)");
if (KEEP_EN && m_axis.KEEP_W != KEEP_W)
$fatal(0, "Error: Interface KEEP_W parameter mismatch (instance %m)");
localparam FIFO_AW = LENGTH < 2 ? 3 : $clog2(LENGTH*4+1);
taxi_axis_if #(.DATA_W(DATA_W), .KEEP_W(KEEP_W), .ID_W(ID_W), .DEST_W(DEST_W), .USER_W(USER_W)) axis_pipe[LENGTH+1]();
for (genvar n = 0; n < LENGTH; n = n + 1) begin : stage
(* shreg_extract = "no" *)
logic [DATA_W-1:0] axis_tdata_reg = 0;
(* shreg_extract = "no" *)
logic [KEEP_W-1:0] axis_tkeep_reg = 0;
(* shreg_extract = "no" *)
logic axis_tvalid_reg = 0;
(* shreg_extract = "no" *)
logic axis_tready_reg = 0;
(* shreg_extract = "no" *)
logic axis_tlast_reg = 0;
(* shreg_extract = "no" *)
logic [ID_W-1:0] axis_tid_reg = 0;
(* shreg_extract = "no" *)
logic [DEST_W-1:0] axis_tdest_reg = 0;
(* shreg_extract = "no" *)
logic [USER_W-1:0] axis_tuser_reg = 0;
assign axis_pipe[n+1].tdata = axis_tdata_reg;
assign axis_pipe[n+1].tkeep = axis_tkeep_reg;
assign axis_pipe[n+1].tvalid = axis_tvalid_reg;
assign axis_pipe[n+1].tlast = axis_tlast_reg;
assign axis_pipe[n+1].tid = axis_tid_reg;
assign axis_pipe[n+1].tdest = axis_tdest_reg;
assign axis_pipe[n+1].tuser = axis_tuser_reg;
assign axis_pipe[n].tready = axis_tready_reg;
always_ff @(posedge clk) begin
axis_tdata_reg <= axis_pipe[n].tdata;
axis_tkeep_reg <= axis_pipe[n].tkeep;
axis_tvalid_reg <= axis_pipe[n].tvalid;
axis_tlast_reg <= axis_pipe[n].tlast;
axis_tid_reg <= axis_pipe[n].tid;
axis_tdest_reg <= axis_pipe[n].tdest;
axis_tuser_reg <= axis_pipe[n].tuser;
axis_tready_reg <= axis_pipe[n+1].tready;
if (rst) begin
axis_tvalid_reg <= 1'b0;
axis_tready_reg <= 1'b0;
end
end
end
if (LENGTH > 0) begin : fifo
assign axis_pipe[0].tdata = s_axis.tdata;
assign axis_pipe[0].tkeep = s_axis.tkeep;
assign axis_pipe[0].tvalid = s_axis.tvalid & s_axis.tready;
assign axis_pipe[0].tlast = s_axis.tlast;
assign axis_pipe[0].tid = s_axis.tid;
assign axis_pipe[0].tdest = s_axis.tdest;
assign axis_pipe[0].tuser = s_axis.tuser;
assign s_axis.tready = axis_pipe[0].tready;
wire [DATA_W-1:0] m_axis_tdata_int = axis_pipe[LENGTH].tdata;
wire [KEEP_W-1:0] m_axis_tkeep_int = axis_pipe[LENGTH].tkeep;
wire m_axis_tvalid_int = axis_pipe[LENGTH].tvalid;
wire m_axis_tready_int;
wire m_axis_tlast_int = axis_pipe[LENGTH].tlast;
wire [ID_W-1:0] m_axis_tid_int = axis_pipe[LENGTH].tid;
wire [DEST_W-1:0] m_axis_tdest_int = axis_pipe[LENGTH].tdest;
wire [USER_W-1:0] m_axis_tuser_int = axis_pipe[LENGTH].tuser;
assign axis_pipe[LENGTH].tready = m_axis_tready_int;
// output datapath logic
logic [DATA_W-1:0] m_axis_tdata_reg = '0;
logic [KEEP_W-1:0] m_axis_tkeep_reg = '0;
logic m_axis_tvalid_reg = 1'b0;
logic m_axis_tlast_reg = 1'b0;
logic [ID_W-1:0] m_axis_tid_reg = '0;
logic [DEST_W-1:0] m_axis_tdest_reg = '0;
logic [USER_W-1:0] m_axis_tuser_reg = '0;
logic [FIFO_AW+1-1:0] out_fifo_wr_ptr_reg = 0;
logic [FIFO_AW+1-1:0] out_fifo_rd_ptr_reg = 0;
logic out_fifo_half_full_reg = 1'b0;
wire out_fifo_full = out_fifo_wr_ptr_reg == (out_fifo_rd_ptr_reg ^ {1'b1, {FIFO_AW{1'b0}}});
wire out_fifo_empty = out_fifo_wr_ptr_reg == out_fifo_rd_ptr_reg;
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic [DATA_W-1:0] out_fifo_tdata[2**FIFO_AW-1:0];
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic [KEEP_W-1:0] out_fifo_tkeep[2**FIFO_AW-1:0];
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic out_fifo_tlast[2**FIFO_AW-1:0];
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic [ID_W-1:0] out_fifo_tid[2**FIFO_AW-1:0];
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic [DEST_W-1:0] out_fifo_tdest[2**FIFO_AW-1:0];
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic [USER_W-1:0] out_fifo_tuser[2**FIFO_AW-1:0];
assign m_axis_tready_int = !out_fifo_half_full_reg;
assign m_axis.tdata = m_axis_tdata_reg;
assign m_axis.tkeep = KEEP_EN ? m_axis_tkeep_reg : '1;
assign m_axis.tvalid = m_axis_tvalid_reg;
assign m_axis.tlast = LAST_EN ? m_axis_tlast_reg : 1'b1;
assign m_axis.tid = ID_EN ? m_axis_tid_reg : '0;
assign m_axis.tdest = DEST_EN ? m_axis_tdest_reg : '0;
assign m_axis.tuser = USER_EN ? m_axis_tuser_reg : '0;
always_ff @(posedge clk) begin
m_axis_tvalid_reg <= m_axis_tvalid_reg && !m_axis.tready;
out_fifo_half_full_reg <= $unsigned(out_fifo_wr_ptr_reg - out_fifo_rd_ptr_reg) >= 2**(FIFO_AW-1);
if (!out_fifo_full && m_axis_tvalid_int) begin
out_fifo_tdata[out_fifo_wr_ptr_reg[FIFO_AW-1:0]] <= m_axis_tdata_int;
out_fifo_tkeep[out_fifo_wr_ptr_reg[FIFO_AW-1:0]] <= m_axis_tkeep_int;
out_fifo_tlast[out_fifo_wr_ptr_reg[FIFO_AW-1:0]] <= m_axis_tlast_int;
out_fifo_tid[out_fifo_wr_ptr_reg[FIFO_AW-1:0]] <= m_axis_tid_int;
out_fifo_tdest[out_fifo_wr_ptr_reg[FIFO_AW-1:0]] <= m_axis_tdest_int;
out_fifo_tuser[out_fifo_wr_ptr_reg[FIFO_AW-1:0]] <= m_axis_tuser_int;
out_fifo_wr_ptr_reg <= out_fifo_wr_ptr_reg + 1;
end
if (!out_fifo_empty && (!m_axis_tvalid_reg || m_axis.tready)) begin
m_axis_tdata_reg <= out_fifo_tdata[out_fifo_rd_ptr_reg[FIFO_AW-1:0]];
m_axis_tkeep_reg <= out_fifo_tkeep[out_fifo_rd_ptr_reg[FIFO_AW-1:0]];
m_axis_tvalid_reg <= 1'b1;
m_axis_tlast_reg <= out_fifo_tlast[out_fifo_rd_ptr_reg[FIFO_AW-1:0]];
m_axis_tid_reg <= out_fifo_tid[out_fifo_rd_ptr_reg[FIFO_AW-1:0]];
m_axis_tdest_reg <= out_fifo_tdest[out_fifo_rd_ptr_reg[FIFO_AW-1:0]];
m_axis_tuser_reg <= out_fifo_tuser[out_fifo_rd_ptr_reg[FIFO_AW-1:0]];
out_fifo_rd_ptr_reg <= out_fifo_rd_ptr_reg + 1;
end
if (rst) begin
out_fifo_wr_ptr_reg <= 0;
out_fifo_rd_ptr_reg <= 0;
m_axis_tvalid_reg <= 1'b0;
end
end
end else begin
// bypass
assign m_axis.tdata = s_axis.tdata;
assign m_axis.tkeep = KEEP_EN ? s_axis.tkeep : '1;
assign m_axis.tvalid = s_axis.tvalid;
assign m_axis.tlast = LAST_EN ? s_axis.tlast : 1'b1;
assign m_axis.tid = ID_EN ? s_axis.tid : '0;
assign m_axis.tdest = DEST_EN ? s_axis.tdest : '0;
assign m_axis.tuser = USER_EN ? s_axis.tuser : '0;
assign s_axis.tready = m_axis.tready;
end
endmodule
`resetall

View File

@@ -0,0 +1,58 @@
# SPDX-License-Identifier: CERN-OHL-S-2.0
#
# Copyright (c) 2021-2025 FPGA Ninja, LLC
#
# Authors:
# - Alex Forencich
TOPLEVEL_LANG = verilog
SIM ?= verilator
WAVES ?= 0
COCOTB_HDL_TIMEUNIT = 1ns
COCOTB_HDL_TIMEPRECISION = 1ps
DUT = taxi_axis_pipeline_fifo
COCOTB_TEST_MODULES = test_$(DUT)
COCOTB_TOPLEVEL = test_$(DUT)
MODULE = $(COCOTB_TEST_MODULES)
TOPLEVEL = $(COCOTB_TOPLEVEL)
VERILOG_SOURCES += $(COCOTB_TOPLEVEL).sv
VERILOG_SOURCES += ../../../rtl/axis/$(DUT).sv
VERILOG_SOURCES += ../../../rtl/axis/taxi_axis_if.sv
# handle file list files
process_f_file = $(call process_f_files,$(addprefix $(dir $1),$(shell cat $1)))
process_f_files = $(foreach f,$1,$(if $(filter %.f,$f),$(call process_f_file,$f),$f))
uniq_base = $(if $1,$(call uniq_base,$(foreach f,$1,$(if $(filter-out $(notdir $(lastword $1)),$(notdir $f)),$f,))) $(lastword $1))
VERILOG_SOURCES := $(call uniq_base,$(call process_f_files,$(VERILOG_SOURCES)))
# module parameters
export PARAM_DATA_W := 8
export PARAM_KEEP_EN := $(shell echo $$(( $(PARAM_DATA_W) > 8 )))
export PARAM_KEEP_W := $(shell echo $$(( ( $(PARAM_DATA_W) + 7 ) / 8 )))
export PARAM_STRB_EN := 0
export PARAM_LAST_EN := 1
export PARAM_ID_EN := 1
export PARAM_ID_W := 8
export PARAM_DEST_EN := 1
export PARAM_DEST_W := 8
export PARAM_USER_EN := 1
export PARAM_USER_W := 1
export PARAM_LENGTH := 2
ifeq ($(SIM), icarus)
PLUSARGS += -fst
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(COCOTB_TOPLEVEL).$(subst PARAM_,,$(v))=$($(v)))
else ifeq ($(SIM), verilator)
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v)))
ifeq ($(WAVES), 1)
COMPILE_ARGS += --trace-fst
VERILATOR_TRACE = 1
endif
endif
include $(shell cocotb-config --makefiles)/Makefile.sim

View File

@@ -0,0 +1,347 @@
#!/usr/bin/env python
# SPDX-License-Identifier: CERN-OHL-S-2.0
"""
Copyright (c) 2021-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
"""
import itertools
import logging
import os
import random
import cocotb_test.simulator
import pytest
import cocotb
from cocotb.clock import Clock
from cocotb.triggers import RisingEdge
from cocotb.regression import TestFactory
from cocotbext.axi import AxiStreamBus, AxiStreamFrame, AxiStreamSource, AxiStreamSink
class TB(object):
def __init__(self, dut):
self.dut = dut
self.log = logging.getLogger("cocotb.tb")
self.log.setLevel(logging.DEBUG)
cocotb.start_soon(Clock(dut.clk, 10, units="ns").start())
self.source = AxiStreamSource(AxiStreamBus.from_entity(dut.s_axis), dut.clk, dut.rst)
self.sink = AxiStreamSink(AxiStreamBus.from_entity(dut.m_axis), dut.clk, dut.rst)
def set_idle_generator(self, generator=None):
if generator:
self.source.set_pause_generator(generator())
def set_backpressure_generator(self, generator=None):
if generator:
self.sink.set_pause_generator(generator())
async def reset(self):
self.dut.rst.setimmediatevalue(0)
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
self.dut.rst.value = 1
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
self.dut.rst.value = 0
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
async def run_test(dut, payload_lengths=None, payload_data=None, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
id_count = 2**len(tb.source.bus.tid)
cur_id = 1
await tb.reset()
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
test_frames = []
for test_data in [payload_data(x) for x in payload_lengths()]:
test_frame = AxiStreamFrame(test_data)
test_frame.tid = cur_id
test_frame.tdest = cur_id
test_frames.append(test_frame)
await tb.source.send(test_frame)
cur_id = (cur_id + 1) % id_count
for test_frame in test_frames:
rx_frame = await tb.sink.recv()
assert rx_frame.tdata == test_frame.tdata
assert rx_frame.tid == test_frame.tid
assert rx_frame.tdest == test_frame.tdest
assert not rx_frame.tuser
assert tb.sink.empty()
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
async def run_test_tuser_assert(dut):
tb = TB(dut)
await tb.reset()
test_data = bytearray(itertools.islice(itertools.cycle(range(256)), 32))
test_frame = AxiStreamFrame(test_data, tuser=1)
await tb.source.send(test_frame)
rx_frame = await tb.sink.recv()
assert rx_frame.tdata == test_data
assert rx_frame.tuser
assert tb.sink.empty()
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
async def run_test_init_sink_pause(dut):
tb = TB(dut)
await tb.reset()
tb.sink.pause = True
test_data = bytearray(itertools.islice(itertools.cycle(range(256)), 32))
test_frame = AxiStreamFrame(test_data)
await tb.source.send(test_frame)
for k in range(64):
await RisingEdge(dut.clk)
tb.sink.pause = False
rx_frame = await tb.sink.recv()
assert rx_frame.tdata == test_data
assert not rx_frame.tuser
assert tb.sink.empty()
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
async def run_test_init_sink_pause_reset(dut):
tb = TB(dut)
await tb.reset()
tb.sink.pause = True
test_data = bytearray(itertools.islice(itertools.cycle(range(256)), 32))
test_frame = AxiStreamFrame(test_data)
await tb.source.send(test_frame)
for k in range(64):
await RisingEdge(dut.clk)
await tb.reset()
tb.sink.pause = False
for k in range(64):
await RisingEdge(dut.clk)
assert tb.sink.empty()
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
async def run_test_overflow(dut):
tb = TB(dut)
await tb.reset()
tb.sink.pause = True
test_data = bytearray(itertools.islice(itertools.cycle(range(256)), 2048))
test_frame = AxiStreamFrame(test_data)
await tb.source.send(test_frame)
for k in range(2048):
await RisingEdge(dut.clk)
tb.sink.pause = False
rx_frame = await tb.sink.recv()
assert rx_frame.tdata == test_data
assert not rx_frame.tuser
assert tb.sink.empty()
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
async def run_stress_test(dut, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
byte_lanes = tb.source.byte_lanes
id_count = 2**len(tb.source.bus.tid)
cur_id = 1
await tb.reset()
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
test_frames = []
for k in range(128):
length = random.randint(1, byte_lanes*16)
test_data = bytearray(itertools.islice(itertools.cycle(range(256)), length))
test_frame = AxiStreamFrame(test_data)
test_frame.tid = cur_id
test_frame.tdest = cur_id
test_frames.append(test_frame)
await tb.source.send(test_frame)
cur_id = (cur_id + 1) % id_count
for test_frame in test_frames:
rx_frame = await tb.sink.recv()
assert rx_frame.tdata == test_frame.tdata
assert rx_frame.tid == test_frame.tid
assert rx_frame.tdest == test_frame.tdest
assert not rx_frame.tuser
assert tb.sink.empty()
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
def cycle_pause():
return itertools.cycle([1, 1, 1, 0])
def size_list():
data_width = len(cocotb.top.m_axis.tdata)
byte_width = data_width // 8
return list(range(1, byte_width*4+1))+[512]+[1]*64
def incrementing_payload(length):
return bytearray(itertools.islice(itertools.cycle(range(256)), length))
if cocotb.SIM_NAME:
factory = TestFactory(run_test)
factory.add_option("payload_lengths", [size_list])
factory.add_option("payload_data", [incrementing_payload])
factory.add_option("idle_inserter", [None, cycle_pause])
factory.add_option("backpressure_inserter", [None, cycle_pause])
factory.generate_tests()
for test in [
run_test_tuser_assert,
run_test_init_sink_pause,
run_test_init_sink_pause_reset,
run_test_overflow
]:
factory = TestFactory(test)
factory.generate_tests()
factory = TestFactory(run_stress_test)
factory.add_option("idle_inserter", [None, cycle_pause])
factory.add_option("backpressure_inserter", [None, cycle_pause])
factory.generate_tests()
# cocotb-test
tests_dir = os.path.dirname(__file__)
rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', '..', 'rtl'))
def process_f_files(files):
lst = {}
for f in files:
if f[-2:].lower() == '.f':
with open(f, 'r') as fp:
l = fp.read().split()
for f in process_f_files([os.path.join(os.path.dirname(f), x) for x in l]):
lst[os.path.basename(f)] = f
else:
lst[os.path.basename(f)] = f
return list(lst.values())
@pytest.mark.parametrize("data_w", [8, 16])
@pytest.mark.parametrize("length", list(range(17)))
def test_taxi_axis_pipeline_fifo(request, length, data_w):
dut = "taxi_axis_pipeline_fifo"
module = os.path.splitext(os.path.basename(__file__))[0]
toplevel = module
verilog_sources = [
os.path.join(tests_dir, f"{toplevel}.sv"),
os.path.join(rtl_dir, "axis", f"{dut}.sv"),
os.path.join(rtl_dir, "axis", "taxi_axis_if.sv"),
]
verilog_sources = process_f_files(verilog_sources)
parameters = {}
parameters['DATA_W'] = data_w
parameters['KEEP_EN'] = int(parameters['DATA_W'] > 8)
parameters['KEEP_W'] = (parameters['DATA_W'] + 7) // 8
parameters['LAST_EN'] = 1
parameters['ID_EN'] = 1
parameters['ID_W'] = 8
parameters['DEST_EN'] = 1
parameters['DEST_W'] = 8
parameters['USER_EN'] = 1
parameters['USER_W'] = 1
parameters['LENGTH'] = length
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
sim_build = os.path.join(tests_dir, "sim_build",
request.node.name.replace('[', '-').replace(']', ''))
cocotb_test.simulator.run(
simulator="verilator",
python_search=[tests_dir],
verilog_sources=verilog_sources,
toplevel=toplevel,
module=module,
parameters=parameters,
sim_build=sim_build,
extra_env=extra_env,
)

View File

@@ -0,0 +1,74 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* AXI4-Stream pipeline FIFO testbench
*/
module test_taxi_axis_pipeline_fifo #
(
/* verilator lint_off WIDTHTRUNC */
parameter DATA_W = 8,
parameter logic KEEP_EN = (DATA_W>8),
parameter KEEP_W = ((DATA_W+7)/8),
parameter logic STRB_EN = 1'b0,
parameter logic LAST_EN = 1'b1,
parameter logic ID_EN = 1'b0,
parameter ID_W = 8,
parameter logic DEST_EN = 1'b0,
parameter DEST_W = 8,
parameter logic USER_EN = 1'b1,
parameter USER_W = 1,
parameter LENGTH = 2
/* verilator lint_on WIDTHTRUNC */
)
();
logic clk;
logic rst;
taxi_axis_if #(
.DATA_W(DATA_W),
.KEEP_EN(KEEP_EN),
.KEEP_W(KEEP_W),
.STRB_EN(STRB_EN),
.LAST_EN(LAST_EN),
.ID_EN(ID_EN),
.ID_W(ID_W),
.DEST_EN(DEST_EN),
.DEST_W(DEST_W),
.USER_EN(USER_EN),
.USER_W(USER_W)
) s_axis(), m_axis();
taxi_axis_pipeline_fifo #(
.LENGTH(LENGTH)
)
uut (
.clk(clk),
.rst(rst),
/*
* AXI4-Stream input (sink)
*/
.s_axis(s_axis),
/*
* AXI4-Stream output (source)
*/
.m_axis(m_axis)
);
endmodule
`resetall