stats: Add strings collector

Signed-off-by: Alex Forencich <alex@alexforencich.com>
This commit is contained in:
Alex Forencich
2025-04-16 22:40:53 -07:00
parent e6cf1f5850
commit 01c0c6cdc6
4 changed files with 543 additions and 0 deletions

View File

@@ -0,0 +1,241 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* Statistics strings collector (full)
*/
module taxi_stats_strings_full #
(
// Pipeline length
parameter PIPELINE = 2
)
(
input wire logic clk,
input wire logic rst,
/*
* Statistics increment input
*/
taxi_axis_if.mon s_axis_stat,
/*
* AXI Lite register interface
*/
taxi_axil_if.wr_slv s_axil_wr,
taxi_axil_if.rd_slv s_axil_rd
);
// localparam STAT_INC_W = s_axis_stat.DATA_W;
localparam STAT_ID_W = s_axis_stat.ID_W;
localparam AXIL_ADDR_W = s_axil_rd.ADDR_W;
localparam AXIL_DATA_W = s_axil_rd.DATA_W;
localparam ID_SHIFT = $clog2(((AXIL_DATA_W > 128 ? AXIL_DATA_W : 128)+7)/8);
localparam WORD_SELECT_SHIFT = $clog2(AXIL_DATA_W/8);
localparam WORD_SELECT_W = 128 > AXIL_DATA_W ? $clog2((128+7)/8) - $clog2(AXIL_DATA_W/8) : 1;
// check configuration
if (AXIL_ADDR_W < STAT_ID_W+ID_SHIFT)
$fatal(0, "Error: AXI lite address width too narrow (instance %m)");
if (PIPELINE < 2)
$fatal(0, "Error: PIPELINE must be at least 2 (instance %m)");
logic init_reg = 1'b1, init_next;
logic [STAT_ID_W-1:0] init_ptr_reg = 0, init_ptr_next;
logic op_acc_pipe_hazard;
logic stage_active;
logic [PIPELINE-1:0] op_axil_read_pipe_reg = 0, op_axil_read_pipe_next;
logic [STAT_ID_W-1:0] mem_addr_pipeline_reg[PIPELINE], mem_addr_pipeline_next[PIPELINE];
logic [WORD_SELECT_W-1:0] axil_shift_pipeline_reg[PIPELINE], axil_shift_pipeline_next[PIPELINE];
logic s_axil_awready_reg = 0, s_axil_awready_next;
logic s_axil_wready_reg = 0, s_axil_wready_next;
logic s_axil_bvalid_reg = 0, s_axil_bvalid_next;
logic s_axil_arready_reg = 0, s_axil_arready_next;
logic [AXIL_DATA_W-1:0] s_axil_rdata_reg = 0, s_axil_rdata_next;
logic s_axil_rvalid_reg = 0, s_axil_rvalid_next;
(* ramstyle = "no_rw_check" *)
logic [127:0] mem[2**STAT_ID_W];
logic [STAT_ID_W-1:0] mem_rd_addr;
logic [STAT_ID_W-1:0] mem_wr_addr;
logic [127:0] mem_wr_data;
logic [15:0] mem_wr_strb;
logic mem_wr_en;
logic [127:0] mem_read_data_reg = 0;
logic [127:0] mem_read_data_pipeline_reg[PIPELINE-1:1];
assign s_axil_wr.awready = s_axil_awready_reg;
assign s_axil_wr.wready = s_axil_wready_reg;
assign s_axil_wr.bresp = 2'b00;
assign s_axil_wr.bvalid = s_axil_bvalid_reg;
assign s_axil_rd.arready = s_axil_arready_reg;
assign s_axil_rd.rdata = s_axil_rdata_reg;
assign s_axil_rd.rresp = 2'b00;
assign s_axil_rd.rvalid = s_axil_rvalid_reg;
wire [STAT_ID_W-1:0] s_axil_araddr_id = STAT_ID_W'(s_axil_rd.araddr >> ID_SHIFT);
wire [WORD_SELECT_W-1:0] s_axil_araddr_shift = WORD_SELECT_W'(s_axil_rd.araddr >> WORD_SELECT_SHIFT);
initial begin
// break up loop to work around iteration termination
for (integer i = 0; i < 2**STAT_ID_W; i = i + 2**(STAT_ID_W/2)) begin
for (integer j = i; j < i + 2**(STAT_ID_W/2); j = j + 1) begin
mem[j] = 0;
end
end
for (integer i = 0; i < PIPELINE; i = i + 1) begin
mem_addr_pipeline_reg[i] = 0;
axil_shift_pipeline_reg[i] = 0;
end
end
always_comb begin
init_next = init_reg;
init_ptr_next = init_ptr_reg;
op_axil_read_pipe_next = PIPELINE'({op_axil_read_pipe_reg, 1'b0});
mem_addr_pipeline_next[0] = 0;
axil_shift_pipeline_next[0] = 0;
for (integer j = 1; j < PIPELINE; j = j + 1) begin
mem_addr_pipeline_next[j] = mem_addr_pipeline_reg[j-1];
axil_shift_pipeline_next[j] = axil_shift_pipeline_reg[j-1];
end
s_axil_awready_next = 1'b0;
s_axil_wready_next = 1'b0;
s_axil_bvalid_next = s_axil_bvalid_reg && !s_axil_wr.bready;
s_axil_arready_next = 1'b0;
s_axil_rdata_next = s_axil_rdata_reg;
s_axil_rvalid_next = s_axil_rvalid_reg && !s_axil_rd.rready;
mem_rd_addr = s_axil_araddr_id;
mem_wr_addr = s_axis_stat.tid;
mem_wr_data = {8{1'b0, s_axis_stat.tdata[15], ~s_axis_stat.tdata[15], s_axis_stat.tdata[14:10], 1'b0, s_axis_stat.tdata[9], ~s_axis_stat.tdata[9], s_axis_stat.tdata[8:4]}};
mem_wr_strb = '0;
mem_wr_strb[s_axis_stat.tdata[2:0]*2 +: 2] = 2'b11;
mem_wr_en = 0;
// discard writes
if (s_axil_wr.awvalid && s_axil_wr.wvalid && (!s_axil_wr.bvalid || s_axil_wr.bready) && (!s_axil_wr.awready && !s_axil_wr.wready)) begin
s_axil_awready_next = 1'b1;
s_axil_wready_next = 1'b1;
s_axil_bvalid_next = 1'b1;
end
// store string data
if (init_reg) begin
// zero strings
init_ptr_next = init_ptr_reg + 1;
mem_wr_addr = init_ptr_reg;
mem_wr_data = '0;
mem_wr_strb = '1;
mem_wr_en = 1'b1;
if (&init_ptr_reg) begin
init_next = 1'b0;
end
end else if (s_axis_stat.tvalid && s_axis_stat.tready && s_axis_stat.tuser) begin
// store string data
mem_wr_addr = s_axis_stat.tid;
mem_wr_data = {8{1'b0, s_axis_stat.tdata[15], ~s_axis_stat.tdata[15], s_axis_stat.tdata[14:10], 1'b0, s_axis_stat.tdata[9], ~s_axis_stat.tdata[9], s_axis_stat.tdata[8:4]}};
mem_wr_strb[s_axis_stat.tdata[2:0]*2 +: 2] = 2'b11;
mem_wr_en = 1'b1;
end
// pipeline stage 0 - accept request
if (s_axil_rd.arvalid && (!s_axil_rd.rvalid || s_axil_rd.rready) && op_axil_read_pipe_reg == 0) begin
// AXIL read
op_axil_read_pipe_next[0] = 1'b1;
s_axil_arready_next = 1'b1;
mem_rd_addr = s_axil_araddr_id;
mem_addr_pipeline_next[0] = s_axil_araddr_id;
axil_shift_pipeline_next[0] = s_axil_araddr_shift;
end
// read complete, perform operation
if (op_axil_read_pipe_reg[PIPELINE-1]) begin
// AXIL read
s_axil_rvalid_next = 1'b1;
s_axil_rdata_next = 0;
if (128 > AXIL_DATA_W) begin
s_axil_rdata_next = AXIL_DATA_W'(mem_read_data_pipeline_reg[PIPELINE-1] >> axil_shift_pipeline_reg[PIPELINE-1]*AXIL_DATA_W);
end else begin
s_axil_rdata_next = AXIL_DATA_W'(mem_read_data_pipeline_reg[PIPELINE-1]);
end
end
end
always_ff @(posedge clk) begin
init_reg <= init_next;
init_ptr_reg <= init_ptr_next;
op_axil_read_pipe_reg <= op_axil_read_pipe_next;
s_axil_awready_reg <= s_axil_awready_next;
s_axil_wready_reg <= s_axil_wready_next;
s_axil_bvalid_reg <= s_axil_bvalid_next;
s_axil_arready_reg <= s_axil_arready_next;
s_axil_rdata_reg <= s_axil_rdata_next;
s_axil_rvalid_reg <= s_axil_rvalid_next;
for (integer i = 0; i < PIPELINE; i = i + 1) begin
mem_addr_pipeline_reg[i] <= mem_addr_pipeline_next[i];
axil_shift_pipeline_reg[i] <= axil_shift_pipeline_next[i];
end
if (mem_wr_en) begin
for (integer i = 0; i < 16; i = i + 1) begin
if (mem_wr_strb[i]) begin
mem[mem_wr_addr][i*8 +: 8] <= mem_wr_data[i*8 +: 8];
end
end
end
mem_read_data_reg <= mem[mem_rd_addr];
mem_read_data_pipeline_reg[1] <= mem_read_data_reg;
for (integer i = 2; i < PIPELINE; i = i + 1) begin
mem_read_data_pipeline_reg[i] <= mem_read_data_pipeline_reg[i-1];
end
if (rst) begin
init_reg <= 1'b1;
init_ptr_reg <= 0;
op_axil_read_pipe_reg <= 0;
s_axil_awready_reg <= 1'b0;
s_axil_wready_reg <= 1'b0;
s_axil_bvalid_reg <= 1'b0;
s_axil_arready_reg <= 1'b0;
s_axil_rvalid_reg <= 1'b0;
end
end
endmodule
`resetall

View File

@@ -0,0 +1,52 @@
# SPDX-License-Identifier: CERN-OHL-S-2.0
#
# Copyright (c) 2025 FPGA Ninja, LLC
#
# Authors:
# - Alex Forencich
TOPLEVEL_LANG = verilog
SIM ?= verilator
WAVES ?= 0
COCOTB_HDL_TIMEUNIT = 1ns
COCOTB_HDL_TIMEPRECISION = 1ps
DUT = taxi_stats_strings_full
COCOTB_TEST_MODULES = test_$(DUT)
COCOTB_TOPLEVEL = test_$(DUT)
MODULE = $(COCOTB_TEST_MODULES)
TOPLEVEL = $(COCOTB_TOPLEVEL)
VERILOG_SOURCES += $(COCOTB_TOPLEVEL).sv
VERILOG_SOURCES += ../../../rtl/stats/$(DUT).sv
VERILOG_SOURCES += ../../../rtl/axis/taxi_axis_if.sv
VERILOG_SOURCES += ../../../rtl/axi/taxi_axil_if.sv
# handle file list files
process_f_file = $(call process_f_files,$(addprefix $(dir $1),$(shell cat $1)))
process_f_files = $(foreach f,$1,$(if $(filter %.f,$f),$(call process_f_file,$f),$f))
uniq_base = $(if $1,$(call uniq_base,$(foreach f,$1,$(if $(filter-out $(notdir $(lastword $1)),$(notdir $f)),$f,))) $(lastword $1))
VERILOG_SOURCES := $(call uniq_base,$(call process_f_files,$(VERILOG_SOURCES)))
# module parameters
export PARAM_PIPELINE := 2
export PARAM_STAT_INC_W := 16
export PARAM_STAT_ID_W := 8
export PARAM_AXIL_DATA_W := 32
export PARAM_AXIL_ADDR_W := $(shell python -c "print($(PARAM_STAT_ID_W)+4)")
ifeq ($(SIM), icarus)
PLUSARGS += -fst
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(COCOTB_TOPLEVEL).$(subst PARAM_,,$(v))=$($(v)))
else ifeq ($(SIM), verilator)
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v)))
ifeq ($(WAVES), 1)
COMPILE_ARGS += --trace-fst
VERILATOR_TRACE = 1
endif
endif
include $(shell cocotb-config --makefiles)/Makefile.sim

View File

@@ -0,0 +1,181 @@
#!/usr/bin/env python
# SPDX-License-Identifier: CERN-OHL-S-2.0
"""
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
"""
import itertools
import logging
import os
import cocotb_test.simulator
import cocotb
from cocotb.clock import Clock
from cocotb.triggers import RisingEdge, Timer
from cocotb.regression import TestFactory
from cocotbext.axi import AxiLiteBus, AxiLiteMaster
from cocotbext.axi import AxiStreamBus, AxiStreamSource, AxiStreamFrame
class TB(object):
def __init__(self, dut):
self.dut = dut
self.log = logging.getLogger("cocotb.tb")
self.log.setLevel(logging.DEBUG)
cocotb.start_soon(Clock(dut.clk, 10, units="ns").start())
self.stat_source = AxiStreamSource(AxiStreamBus.from_entity(dut.s_axis_stat), dut.clk, dut.rst)
self.axil_master = AxiLiteMaster(AxiLiteBus.from_entity(dut.s_axil), dut.clk, dut.rst)
def set_idle_generator(self, generator=None):
if generator:
self.stat_source.set_pause_generator(generator())
self.axil_master.write_if.aw_channel.set_pause_generator(generator())
self.axil_master.write_if.w_channel.set_pause_generator(generator())
self.axil_master.read_if.ar_channel.set_pause_generator(generator())
def set_backpressure_generator(self, generator=None):
if generator:
self.axil_master.write_if.b_channel.set_pause_generator(generator())
self.axil_master.read_if.r_channel.set_pause_generator(generator())
async def cycle_reset(self):
self.dut.rst.setimmediatevalue(0)
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
self.dut.rst.value = 1
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
self.dut.rst.value = 0
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
async def run_test_strings(dut, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
await tb.cycle_reset()
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
await Timer(4000, 'ns')
for n in range(10):
s1 = f'BLK'
s2 = f'STR_{n}'
s = f'{s1:8}{s2:8}'
print(s)
b = s.encode('ascii')
for k in range(0, 8):
val = k
for m in range(2):
c = b[k*2+m]
c = (c & 0x1f) | (0x20 if c & 0x40 else 0)
val |= c << (4+6*m)
await tb.stat_source.send(AxiStreamFrame([val], tid=n, tuser=1))
await tb.stat_source.send(AxiStreamFrame([0xdead], tid=n, tuser=0))
await Timer(12000, 'ns')
data = await tb.axil_master.read_words(0, 10, ws=16)
print(data)
for i, d in enumerate(data):
s = d.to_bytes(16, 'little')
print(s)
s = (s[0:8].strip() + b"." + s[8:].strip()).decode('ascii')
print(s)
assert s == f'BLK.STR_{i}'
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
def cycle_pause():
return itertools.cycle([1, 1, 1, 0])
if cocotb.SIM_NAME:
for test in [run_test_strings]:
factory = TestFactory(test)
factory.add_option("idle_inserter", [None, cycle_pause])
factory.add_option("backpressure_inserter", [None, cycle_pause])
factory.generate_tests()
# cocotb-test
tests_dir = os.path.dirname(__file__)
rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', '..', 'rtl'))
def process_f_files(files):
lst = {}
for f in files:
if f[-2:].lower() == '.f':
with open(f, 'r') as fp:
l = fp.read().split()
for f in process_f_files([os.path.join(os.path.dirname(f), x) for x in l]):
lst[os.path.basename(f)] = f
else:
lst[os.path.basename(f)] = f
return list(lst.values())
def test_taxi_stats_strings_full(request):
dut = "taxi_stats_strings_full"
module = os.path.splitext(os.path.basename(__file__))[0]
toplevel = module
verilog_sources = [
os.path.join(tests_dir, f"{toplevel}.sv"),
os.path.join(rtl_dir, "stats", f"{dut}.sv"),
os.path.join(rtl_dir, "axis", "taxi_axis_if.sv"),
os.path.join(rtl_dir, "axi", "taxi_axil_if.sv"),
]
verilog_sources = process_f_files(verilog_sources)
parameters = {}
parameters['PIPELINE'] = 2
parameters['STAT_INC_W'] = 16
parameters['STAT_ID_W'] = 8
parameters['AXIL_DATA_W'] = 32
parameters['AXIL_ADDR_W'] = parameters['STAT_ID_W'] + 4
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
sim_build = os.path.join(tests_dir, "sim_build",
request.node.name.replace('[', '-').replace(']', ''))
cocotb_test.simulator.run(
simulator="verilator",
python_search=[tests_dir],
verilog_sources=verilog_sources,
toplevel=toplevel,
module=module,
parameters=parameters,
sim_build=sim_build,
extra_env=extra_env,
)

View File

@@ -0,0 +1,69 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* Statistics counter testbench
*/
module test_taxi_stats_strings_full #
(
/* verilator lint_off WIDTHTRUNC */
parameter PIPELINE = 2,
parameter STAT_INC_W = 16,
parameter STAT_ID_W = 8,
parameter AXIL_DATA_W = 32,
parameter AXIL_ADDR_W = STAT_ID_W + 4
/* verilator lint_on WIDTHTRUNC */
)
();
logic clk;
logic rst;
taxi_axis_if #(
.DATA_W(STAT_INC_W),
.KEEP_EN(0),
.KEEP_W(1),
.ID_EN(1),
.ID_W(STAT_ID_W)
) s_axis_stat();
taxi_axil_if #(
.DATA_W(AXIL_DATA_W),
.ADDR_W(AXIL_ADDR_W)
) s_axil();
taxi_stats_strings_full #(
.PIPELINE(PIPELINE)
)
uut (
.clk(clk),
.rst(rst),
/*
* Statistics increment input
*/
.s_axis_stat(s_axis_stat),
/*
* AXI Lite register interface
*/
.s_axil_wr(s_axil),
.s_axil_rd(s_axil)
);
assign s_axis_stat.tready = 1'b1;
endmodule
`resetall