stats: Add statistics counter module and testbench

Signed-off-by: Alex Forencich <alex@alexforencich.com>
This commit is contained in:
Alex Forencich
2025-03-25 00:02:58 -07:00
parent fd3e23ef6e
commit d2b0fa4693
4 changed files with 618 additions and 0 deletions

View File

@@ -0,0 +1,268 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2021-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* Statistics counter
*/
module taxi_stats_counter #
(
// Statistics counter (bits)
parameter STAT_COUNT_W = 32,
// Pipeline length
parameter PIPELINE = 2
)
(
input wire logic clk,
input wire logic rst,
/*
* Statistics increment input
*/
taxi_axis_if.snk s_axis_stat,
/*
* AXI Lite register interface
*/
taxi_axil_if.wr_slv s_axil_wr,
taxi_axil_if.rd_slv s_axil_rd
);
localparam STAT_INC_W = s_axis_stat.DATA_W;
localparam STAT_ID_W = s_axis_stat.ID_W;
localparam AXIL_ADDR_W = s_axil_rd.ADDR_W;
localparam AXIL_DATA_W = s_axil_rd.DATA_W;
localparam ID_SHIFT = $clog2(((AXIL_DATA_W > STAT_COUNT_W ? AXIL_DATA_W : STAT_COUNT_W)+7)/8);
localparam WORD_SELECT_SHIFT = $clog2(AXIL_DATA_W/8);
localparam WORD_SELECT_W = STAT_COUNT_W > AXIL_DATA_W ? $clog2((STAT_COUNT_W+7)/8) - $clog2(AXIL_DATA_W/8) : 1;
// check configuration
if (AXIL_ADDR_W < STAT_ID_W+ID_SHIFT)
$fatal(0, "Error: AXI lite address width too narrow (instance %m)");
if (PIPELINE < 2)
$fatal(0, "Error: PIPELINE must be at least 2 (instance %m)");
logic init_reg = 1'b1, init_next;
logic [STAT_ID_W-1:0] init_ptr_reg = 0, init_ptr_next;
logic op_acc_pipe_hazard;
logic stage_active;
logic [PIPELINE-1:0] op_axil_read_pipe_reg = 0, op_axil_read_pipe_next;
logic [PIPELINE-1:0] op_acc_pipe_reg = 0, op_acc_pipe_next;
logic [STAT_ID_W-1:0] mem_addr_pipeline_reg[PIPELINE], mem_addr_pipeline_next[PIPELINE];
logic [WORD_SELECT_W-1:0] axil_shift_pipeline_reg[PIPELINE], axil_shift_pipeline_next[PIPELINE];
logic [STAT_INC_W-1:0] stat_inc_pipeline_reg[PIPELINE], stat_inc_pipeline_next[PIPELINE];
logic s_axis_stat_tready_reg = 1'b0, s_axis_stat_tready_next;
logic s_axil_awready_reg = 0, s_axil_awready_next;
logic s_axil_wready_reg = 0, s_axil_wready_next;
logic s_axil_bvalid_reg = 0, s_axil_bvalid_next;
logic s_axil_arready_reg = 0, s_axil_arready_next;
logic [AXIL_DATA_W-1:0] s_axil_rdata_reg = 0, s_axil_rdata_next;
logic s_axil_rvalid_reg = 0, s_axil_rvalid_next;
(* ramstyle = "no_rw_check" *)
logic [STAT_COUNT_W-1:0] mem[2**STAT_ID_W];
logic [STAT_ID_W-1:0] mem_rd_addr;
logic [STAT_ID_W-1:0] mem_wr_addr;
logic [STAT_COUNT_W-1:0] mem_wr_data;
logic mem_wr_en;
logic [STAT_COUNT_W-1:0] mem_read_data_reg = 0;
logic [STAT_COUNT_W-1:0] mem_read_data_pipeline_reg[PIPELINE-1:1];
assign s_axis_stat.tready = s_axis_stat_tready_reg;
assign s_axil_wr.awready = s_axil_awready_reg;
assign s_axil_wr.wready = s_axil_wready_reg;
assign s_axil_wr.bresp = 2'b00;
assign s_axil_wr.bvalid = s_axil_bvalid_reg;
assign s_axil_rd.arready = s_axil_arready_reg;
assign s_axil_rd.rdata = s_axil_rdata_reg;
assign s_axil_rd.rresp = 2'b00;
assign s_axil_rd.rvalid = s_axil_rvalid_reg;
wire [STAT_ID_W-1:0] s_axil_araddr_id = STAT_ID_W'(s_axil_rd.araddr >> ID_SHIFT);
wire [WORD_SELECT_W-1:0] s_axil_araddr_shift = WORD_SELECT_W'(s_axil_rd.araddr >> WORD_SELECT_SHIFT);
initial begin
// break up loop to work around iteration termination
for (integer i = 0; i < 2**STAT_ID_W; i = i + 2**(STAT_ID_W/2)) begin
for (integer j = i; j < i + 2**(STAT_ID_W/2); j = j + 1) begin
mem[j] = 0;
end
end
for (integer i = 0; i < PIPELINE; i = i + 1) begin
mem_addr_pipeline_reg[i] = 0;
axil_shift_pipeline_reg[i] = 0;
stat_inc_pipeline_reg[i] = 0;
end
end
always_comb begin
init_next = init_reg;
init_ptr_next = init_ptr_reg;
op_axil_read_pipe_next = PIPELINE'({op_axil_read_pipe_reg, 1'b0});
op_acc_pipe_next = PIPELINE'({op_acc_pipe_reg, 1'b0});
mem_addr_pipeline_next[0] = 0;
axil_shift_pipeline_next[0] = 0;
stat_inc_pipeline_next[0] = 0;
for (integer j = 1; j < PIPELINE; j = j + 1) begin
mem_addr_pipeline_next[j] = mem_addr_pipeline_reg[j-1];
axil_shift_pipeline_next[j] = axil_shift_pipeline_reg[j-1];
stat_inc_pipeline_next[j] = stat_inc_pipeline_reg[j-1];
end
s_axis_stat_tready_next = 1'b0;
s_axil_awready_next = 1'b0;
s_axil_wready_next = 1'b0;
s_axil_bvalid_next = s_axil_bvalid_reg && !s_axil.bready;
s_axil_arready_next = 1'b0;
s_axil_rdata_next = s_axil_rdata_reg;
s_axil_rvalid_next = s_axil_rvalid_reg && !s_axil.rready;
mem_rd_addr = 0;
mem_wr_addr = mem_addr_pipeline_reg[PIPELINE-1];
mem_wr_data = mem_read_data_pipeline_reg[PIPELINE-1] + STAT_COUNT_W'(stat_inc_pipeline_reg[PIPELINE-1]);
mem_wr_en = 0;
op_acc_pipe_hazard = 1'b0;
stage_active = 1'b0;
for (integer j = 0; j < PIPELINE; j = j + 1) begin
stage_active = op_axil_read_pipe_reg[j] || op_acc_pipe_reg[j];
op_acc_pipe_hazard = op_acc_pipe_hazard || (stage_active && mem_addr_pipeline_reg[j] == s_axis_stat.tid);
end
// discard writes
if (s_axil_wr.awvalid && s_axil_wr.wvalid && (!s_axil_wr.bvalid || s_axil_wr.bready) && (!s_axil_wr.awready && !s_axil_wr.wready)) begin
s_axil_awready_next = 1'b1;
s_axil_wready_next = 1'b1;
s_axil_bvalid_next = 1'b1;
end
// pipeline stage 0 - accept request
if (init_reg) begin
// zero all counters
init_ptr_next = init_ptr_reg + 1;
mem_wr_addr = init_ptr_reg;
mem_wr_data = 0;
mem_wr_en = 1'b1;
if (&init_ptr_reg) begin
init_next = 1'b0;
end
end else if (s_axil_rd.arvalid && (!s_axil_rd.rvalid || s_axil_rd.rready) && op_axil_read_pipe_reg == 0) begin
// AXIL read
op_axil_read_pipe_next[0] = 1'b1;
s_axil_arready_next = 1'b1;
mem_rd_addr = s_axil_araddr_id;
mem_addr_pipeline_next[0] = s_axil_araddr_id;
axil_shift_pipeline_next[0] = s_axil_araddr_shift;
end else if (s_axis_stat.tvalid && !s_axis_stat.tready && !op_acc_pipe_hazard) begin
// accumulate
op_acc_pipe_next[0] = 1'b1;
s_axis_stat_tready_next = 1'b1;
stat_inc_pipeline_next[0] = s_axis_stat.tdata;
mem_rd_addr = s_axis_stat.tid;
mem_addr_pipeline_next[0] = s_axis_stat.tid;
end
// read complete, perform operation
if (op_acc_pipe_reg[PIPELINE-1]) begin
// accumulate
mem_wr_addr = mem_addr_pipeline_reg[PIPELINE-1];
mem_wr_data = mem_read_data_pipeline_reg[PIPELINE-1] + STAT_COUNT_W'(stat_inc_pipeline_reg[PIPELINE-1]);
mem_wr_en = 1'b1;
end else if (op_axil_read_pipe_reg[PIPELINE-1]) begin
// AXIL read
s_axil_rvalid_next = 1'b1;
s_axil_rdata_next = 0;
if (STAT_COUNT_W > AXIL_DATA_W) begin
s_axil_rdata_next = AXIL_DATA_W'(mem_read_data_pipeline_reg[PIPELINE-1] >> axil_shift_pipeline_reg[PIPELINE-1]*AXIL_DATA_W);
end else begin
s_axil_rdata_next = AXIL_DATA_W'(mem_read_data_pipeline_reg[PIPELINE-1]);
end
end
end
always_ff @(posedge clk) begin
init_reg <= init_next;
init_ptr_reg <= init_ptr_next;
op_axil_read_pipe_reg <= op_axil_read_pipe_next;
op_acc_pipe_reg <= op_acc_pipe_next;
s_axis_stat_tready_reg <= s_axis_stat_tready_next;
s_axil_awready_reg <= s_axil_awready_next;
s_axil_wready_reg <= s_axil_wready_next;
s_axil_bvalid_reg <= s_axil_bvalid_next;
s_axil_arready_reg <= s_axil_arready_next;
s_axil_rdata_reg <= s_axil_rdata_next;
s_axil_rvalid_reg <= s_axil_rvalid_next;
for (integer i = 0; i < PIPELINE; i = i + 1) begin
mem_addr_pipeline_reg[i] <= mem_addr_pipeline_next[i];
axil_shift_pipeline_reg[i] <= axil_shift_pipeline_next[i];
stat_inc_pipeline_reg[i] <= stat_inc_pipeline_next[i];
end
if (mem_wr_en) begin
mem[mem_wr_addr] <= mem_wr_data;
end
mem_read_data_reg <= mem[mem_rd_addr];
mem_read_data_pipeline_reg[1] <= mem_read_data_reg;
for (integer i = 2; i < PIPELINE; i = i + 1) begin
mem_read_data_pipeline_reg[i] <= mem_read_data_pipeline_reg[i-1];
end
if (rst) begin
init_reg <= 1'b1;
init_ptr_reg <= 0;
op_axil_read_pipe_reg <= 0;
op_acc_pipe_reg <= 0;
s_axis_stat_tready_reg <= 1'b0;
s_axil_awready_reg <= 1'b0;
s_axil_wready_reg <= 1'b0;
s_axil_bvalid_reg <= 1'b0;
s_axil_arready_reg <= 1'b0;
s_axil_rvalid_reg <= 1'b0;
end
end
endmodule
`resetall

View File

@@ -0,0 +1,53 @@
# SPDX-License-Identifier: CERN-OHL-S-2.0
#
# Copyright (c) 2021-2025 FPGA Ninja, LLC
#
# Authors:
# - Alex Forencich
TOPLEVEL_LANG = verilog
SIM ?= verilator
WAVES ?= 0
COCOTB_HDL_TIMEUNIT = 1ns
COCOTB_HDL_TIMEPRECISION = 1ps
DUT = taxi_stats_counter
COCOTB_TEST_MODULES = test_$(DUT)
COCOTB_TOPLEVEL = test_$(DUT)
MODULE = $(COCOTB_TEST_MODULES)
TOPLEVEL = $(COCOTB_TOPLEVEL)
VERILOG_SOURCES += $(COCOTB_TOPLEVEL).sv
VERILOG_SOURCES += ../../../rtl/stats/$(DUT).sv
VERILOG_SOURCES += ../../../rtl/axis/taxi_axis_if.sv
VERILOG_SOURCES += ../../../rtl/axi/taxi_axil_if.sv
# handle file list files
process_f_file = $(call process_f_files,$(addprefix $(dir $1),$(shell cat $1)))
process_f_files = $(foreach f,$1,$(if $(filter %.f,$f),$(call process_f_file,$f),$f))
uniq_base = $(if $1,$(call uniq_base,$(foreach f,$1,$(if $(filter-out $(notdir $(lastword $1)),$(notdir $f)),$f,))) $(lastword $1))
VERILOG_SOURCES := $(call uniq_base,$(call process_f_files,$(VERILOG_SOURCES)))
# module parameters
export PARAM_STAT_COUNT_W := 32
export PARAM_PIPELINE := 2
export PARAM_STAT_INC_W := 16
export PARAM_STAT_ID_W := 8
export PARAM_AXIL_DATA_W := 32
export PARAM_AXIL_ADDR_W := $(shell python -c "print($(PARAM_STAT_ID_W) + (($(PARAM_STAT_COUNT_W)+7)//8-1).bit_length())")
ifeq ($(SIM), icarus)
PLUSARGS += -fst
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(COCOTB_TOPLEVEL).$(subst PARAM_,,$(v))=$($(v)))
else ifeq ($(SIM), verilator)
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v)))
ifeq ($(WAVES), 1)
COMPILE_ARGS += --trace-fst
VERILATOR_TRACE = 1
endif
endif
include $(shell cocotb-config --makefiles)/Makefile.sim

View File

@@ -0,0 +1,228 @@
#!/usr/bin/env python
# SPDX-License-Identifier: CERN-OHL-S-2.0
"""
Copyright (c) 2021-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
"""
import itertools
import logging
import os
import random
import cocotb_test.simulator
import pytest
import cocotb
from cocotb.clock import Clock
from cocotb.queue import Queue
from cocotb.triggers import RisingEdge, Timer
from cocotb.regression import TestFactory
from cocotbext.axi import AxiLiteBus, AxiLiteMaster
from cocotbext.axi import AxiStreamBus, AxiStreamSource, AxiStreamFrame
class TB(object):
def __init__(self, dut):
self.dut = dut
self.log = logging.getLogger("cocotb.tb")
self.log.setLevel(logging.DEBUG)
cocotb.start_soon(Clock(dut.clk, 10, units="ns").start())
self.stat_source = AxiStreamSource(AxiStreamBus.from_entity(dut.s_axis_stat), dut.clk, dut.rst)
self.axil_master = AxiLiteMaster(AxiLiteBus.from_entity(dut.s_axil), dut.clk, dut.rst)
def set_idle_generator(self, generator=None):
if generator:
self.stat_source.set_pause_generator(generator())
self.axil_master.write_if.aw_channel.set_pause_generator(generator())
self.axil_master.write_if.w_channel.set_pause_generator(generator())
self.axil_master.read_if.ar_channel.set_pause_generator(generator())
def set_backpressure_generator(self, generator=None):
if generator:
self.axil_master.write_if.b_channel.set_pause_generator(generator())
self.axil_master.read_if.r_channel.set_pause_generator(generator())
async def cycle_reset(self):
self.dut.rst.setimmediatevalue(0)
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
self.dut.rst.value = 1
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
self.dut.rst.value = 0
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
async def run_test_acc(dut, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
byte_lanes = tb.axil_master.read_if.byte_lanes
counter_size = max(dut.STAT_COUNT_W.value // 8, byte_lanes)
await tb.cycle_reset()
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
await Timer(4000, 'ns')
for n in range(10):
for k in range(10):
await tb.stat_source.send(AxiStreamFrame([k], tid=k))
await Timer(1000, 'ns')
data = await tb.axil_master.read_words(0, 10, ws=counter_size)
print(data)
for n in range(10):
assert data[n] == n*10
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
async def run_stress_test(dut, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
byte_lanes = tb.axil_master.read_if.byte_lanes
counter_size = max(dut.STAT_COUNT_W.value // 8, byte_lanes)
stat_inc_width = len(dut.s_axis_stat.tdata)
stat_id_width = len(dut.s_axis_stat.tid)
await tb.cycle_reset()
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
await Timer(4000, 'ns')
async def worker(source, queue, count=128):
for k in range(count):
count = random.randrange(1, 2**stat_inc_width)
num = random.randrange(0, 2**stat_id_width)
await tb.stat_source.send(AxiStreamFrame([count], tid=num))
await queue.put((num, count))
await Timer(random.randint(1, 1000), 'ns')
workers = []
queue = Queue()
for k in range(16):
workers.append(cocotb.start_soon(worker(tb.stat_source, queue, count=128)))
while workers:
await workers.pop(0).join()
await Timer(1000, 'ns')
data_ref = [0]*2**stat_id_width
while not queue.empty():
num, count = await queue.get()
data_ref[num] += count
print(data_ref)
data = await tb.axil_master.read_words(0, 2**stat_id_width, ws=counter_size)
print(data)
assert data == data_ref
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
def cycle_pause():
return itertools.cycle([1, 1, 1, 0])
if cocotb.SIM_NAME:
for test in [run_test_acc]:
factory = TestFactory(test)
factory.add_option("idle_inserter", [None, cycle_pause])
factory.add_option("backpressure_inserter", [None, cycle_pause])
factory.generate_tests()
factory = TestFactory(run_stress_test)
factory.generate_tests()
# cocotb-test
tests_dir = os.path.dirname(__file__)
rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', '..', 'rtl'))
def process_f_files(files):
lst = {}
for f in files:
if f[-2:].lower() == '.f':
with open(f, 'r') as fp:
l = fp.read().split()
for f in process_f_files([os.path.join(os.path.dirname(f), x) for x in l]):
lst[os.path.basename(f)] = f
else:
lst[os.path.basename(f)] = f
return list(lst.values())
@pytest.mark.parametrize("stat_count_w", [32, 64])
def test_taxi_stats_counter(request, stat_count_w):
dut = "taxi_stats_counter"
module = os.path.splitext(os.path.basename(__file__))[0]
toplevel = module
verilog_sources = [
os.path.join(tests_dir, f"{toplevel}.sv"),
os.path.join(rtl_dir, "stats", f"{dut}.sv"),
os.path.join(rtl_dir, "axis", "taxi_axis_if.sv"),
os.path.join(rtl_dir, "axi", "taxi_axil_if.sv"),
]
verilog_sources = process_f_files(verilog_sources)
parameters = {}
parameters['STAT_COUNT_W'] = stat_count_w
parameters['PIPELINE'] = 2
parameters['STAT_INC_W'] = 16
parameters['STAT_ID_W'] = 8
parameters['AXIL_DATA_W'] = 32
parameters['AXIL_ADDR_W'] = parameters['STAT_ID_W'] + ((parameters['STAT_COUNT_W']+7)//8-1).bit_length()
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
sim_build = os.path.join(tests_dir, "sim_build",
request.node.name.replace('[', '-').replace(']', ''))
cocotb_test.simulator.run(
simulator="verilator",
python_search=[tests_dir],
verilog_sources=verilog_sources,
toplevel=toplevel,
module=module,
parameters=parameters,
sim_build=sim_build,
extra_env=extra_env,
)

View File

@@ -0,0 +1,69 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* Statistics counter testbench
*/
module test_taxi_stats_counter #
(
/* verilator lint_off WIDTHTRUNC */
parameter STAT_COUNT_W = 32,
parameter PIPELINE = 2,
parameter STAT_INC_W = 16,
parameter STAT_ID_W = 8,
parameter AXIL_DATA_W = 32,
parameter AXIL_ADDR_W = STAT_ID_W + $clog2((STAT_COUNT_W+7)/8)
/* verilator lint_on WIDTHTRUNC */
)
();
logic clk;
logic rst;
taxi_axis_if #(
.DATA_W(STAT_INC_W),
.KEEP_EN(0),
.KEEP_W(1),
.ID_EN(1),
.ID_W(STAT_ID_W)
) s_axis_stat();
taxi_axil_if #(
.DATA_W(AXIL_DATA_W),
.ADDR_W(AXIL_ADDR_W)
) s_axil();
taxi_stats_counter #(
.STAT_COUNT_W(STAT_COUNT_W),
.PIPELINE(PIPELINE)
)
uut (
.clk(clk),
.rst(rst),
/*
* Statistics increment input
*/
.s_axis_stat(s_axis_stat),
/*
* AXI Lite register interface
*/
.s_axil_wr(s_axil),
.s_axil_rd(s_axil)
);
endmodule
`resetall