axi: Add AXI RAM module and testbench

Signed-off-by: Alex Forencich <alex@alexforencich.com>
This commit is contained in:
Alex Forencich
2025-02-27 00:27:11 -08:00
parent 0632b1982e
commit 55c097f47d
5 changed files with 672 additions and 0 deletions

View File

@@ -25,6 +25,7 @@ To facilitate the dual-license model, contributions to the project can only be a
* AXI
* SV interface for AXI
* Register slice
* Single port RAM
* AXI lite
* SV interface for AXI lite
* Register slice

327
rtl/axi/taxi_axi_ram.sv Normal file
View File

@@ -0,0 +1,327 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2018-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* AXI4 RAM
*/
module taxi_axi_ram #
(
// Width of address bus in bits
parameter ADDR_W = 16,
// Extra pipeline register on output
parameter logic PIPELINE_OUTPUT = 1'b0
)
(
input wire logic clk,
input wire logic rst,
/*
* AXI4 slave interface
*/
taxi_axi_if.wr_slv s_axi_wr,
taxi_axi_if.rd_slv s_axi_rd
);
// extract parameters
localparam DATA_W = s_axi_wr.DATA_W;
localparam STRB_W = s_axi_wr.STRB_W;
localparam WR_ID_W = s_axi_wr.ID_W;
localparam RD_ID_W = s_axi_rd.ID_W;
localparam VALID_ADDR_W = ADDR_W - $clog2(STRB_W);
localparam BYTE_LANES = STRB_W;
localparam BYTE_W = DATA_W/BYTE_LANES;
// check configuration
if (BYTE_W * STRB_W != DATA_W)
$fatal(0, "Error: AXI data width not evenly divisible (instance %m)");
if (2**$clog2(BYTE_LANES) != BYTE_LANES)
$fatal(0, "Error: AXI byte lane count must be even power of two (instance %m)");
if (s_axi_wr.DATA_W != s_axi_rd.DATA_W)
$fatal(0, "Error: AXI interface configuration mismatch (instance %m)");
if (s_axi_wr.ADDR_W < ADDR_W || s_axi_rd.ADDR_W < ADDR_W)
$fatal(0, "Error: AXI address width is insufficient (instance %m)");
localparam [0:0]
READ_STATE_IDLE = 1'd0,
READ_STATE_BURST = 1'd1;
logic [0:0] read_state_reg = READ_STATE_IDLE, read_state_next;
localparam [1:0]
WRITE_STATE_IDLE = 2'd0,
WRITE_STATE_BURST = 2'd1,
WRITE_STATE_RESP = 2'd2;
logic [1:0] write_state_reg = WRITE_STATE_IDLE, write_state_next;
logic mem_wr_en;
logic mem_rd_en;
logic [WR_ID_W-1:0] write_id_reg = '0, write_id_next;
logic [ADDR_W-1:0] write_addr_reg = '0, write_addr_next;
logic [7:0] write_count_reg = 8'd0, write_count_next;
logic [2:0] write_size_reg = 3'd0, write_size_next;
logic [1:0] write_burst_reg = 2'd0, write_burst_next;
logic [RD_ID_W-1:0] read_id_reg = '0, read_id_next;
logic [ADDR_W-1:0] read_addr_reg = '0, read_addr_next;
logic [7:0] read_count_reg = 8'd0, read_count_next;
logic [2:0] read_size_reg = 3'd0, read_size_next;
logic [1:0] read_burst_reg = 2'd0, read_burst_next;
logic s_axi_awready_reg = 1'b0, s_axi_awready_next;
logic s_axi_wready_reg = 1'b0, s_axi_wready_next;
logic [WR_ID_W-1:0] s_axi_bid_reg = '0, s_axi_bid_next;
logic s_axi_bvalid_reg = 1'b0, s_axi_bvalid_next;
logic s_axi_arready_reg = 1'b0, s_axi_arready_next;
logic [RD_ID_W-1:0] s_axi_rid_reg = '0, s_axi_rid_next;
logic [DATA_W-1:0] s_axi_rdata_reg = '0, s_axi_rdata_next;
logic s_axi_rlast_reg = 1'b0, s_axi_rlast_next;
logic s_axi_rvalid_reg = 1'b0, s_axi_rvalid_next;
logic [RD_ID_W-1:0] s_axi_rid_pipe_reg = '0;
logic [DATA_W-1:0] s_axi_rdata_pipe_reg = '0;
logic s_axi_rlast_pipe_reg = 1'b0;
logic s_axi_rvalid_pipe_reg = 1'b0;
// (* RAM_STYLE="BLOCK" *)
logic [DATA_W-1:0] mem[(2**VALID_ADDR_W)-1:0];
wire [VALID_ADDR_W-1:0] read_addr_valid = VALID_ADDR_W'(read_addr_reg >> (ADDR_W - VALID_ADDR_W));
wire [VALID_ADDR_W-1:0] write_addr_valid = VALID_ADDR_W'(write_addr_reg >> (ADDR_W - VALID_ADDR_W));
assign s_axi_wr.awready = s_axi_awready_reg;
assign s_axi_wr.wready = s_axi_wready_reg;
assign s_axi_wr.bid = s_axi_bid_reg;
assign s_axi_wr.bresp = 2'b00;
assign s_axi_wr.bvalid = s_axi_bvalid_reg;
assign s_axi_rd.arready = s_axi_arready_reg;
assign s_axi_rd.rid = PIPELINE_OUTPUT ? s_axi_rid_pipe_reg : s_axi_rid_reg;
assign s_axi_rd.rdata = PIPELINE_OUTPUT ? s_axi_rdata_pipe_reg : s_axi_rdata_reg;
assign s_axi_rd.rresp = 2'b00;
assign s_axi_rd.rlast = PIPELINE_OUTPUT ? s_axi_rlast_pipe_reg : s_axi_rlast_reg;
assign s_axi_rd.rvalid = PIPELINE_OUTPUT ? s_axi_rvalid_pipe_reg : s_axi_rvalid_reg;
initial begin
// two nested loops for smaller number of iterations per loop
// workaround for synthesizer complaints about large loop counts
for (integer i = 0; i < 2**VALID_ADDR_W; i = i + 2**(VALID_ADDR_W/2)) begin
for (integer j = i; j < i + 2**(VALID_ADDR_W/2); j = j + 1) begin
mem[j] = '0;
end
end
end
always_comb begin
write_state_next = WRITE_STATE_IDLE;
mem_wr_en = 1'b0;
write_id_next = write_id_reg;
write_addr_next = write_addr_reg;
write_count_next = write_count_reg;
write_size_next = write_size_reg;
write_burst_next = write_burst_reg;
s_axi_awready_next = 1'b0;
s_axi_wready_next = 1'b0;
s_axi_bid_next = s_axi_bid_reg;
s_axi_bvalid_next = s_axi_bvalid_reg && !s_axi_wr.bready;
case (write_state_reg)
WRITE_STATE_IDLE: begin
s_axi_awready_next = 1'b1;
if (s_axi_wr.awready && s_axi_wr.awvalid) begin
write_id_next = s_axi_wr.awid;
write_addr_next = ADDR_W'(s_axi_wr.awaddr);
write_count_next = s_axi_wr.awlen;
write_size_next = s_axi_wr.awsize <= 3'($clog2(STRB_W)) ? s_axi_wr.awsize : 3'($clog2(STRB_W));
write_burst_next = s_axi_wr.awburst;
s_axi_awready_next = 1'b0;
s_axi_wready_next = 1'b1;
write_state_next = WRITE_STATE_BURST;
end else begin
write_state_next = WRITE_STATE_IDLE;
end
end
WRITE_STATE_BURST: begin
s_axi_wready_next = 1'b1;
if (s_axi_wr.wready && s_axi_wr.wvalid) begin
mem_wr_en = 1'b1;
if (write_burst_reg != 2'b00) begin
write_addr_next = write_addr_reg + (1 << write_size_reg);
end
write_count_next = write_count_reg - 1;
if (write_count_reg > 0) begin
write_state_next = WRITE_STATE_BURST;
end else begin
s_axi_wready_next = 1'b0;
if (s_axi_wr.bready || !s_axi_wr.bvalid) begin
s_axi_bid_next = write_id_reg;
s_axi_bvalid_next = 1'b1;
s_axi_awready_next = 1'b1;
write_state_next = WRITE_STATE_IDLE;
end else begin
write_state_next = WRITE_STATE_RESP;
end
end
end else begin
write_state_next = WRITE_STATE_BURST;
end
end
WRITE_STATE_RESP: begin
if (s_axi_wr.bready || !s_axi_wr.bvalid) begin
s_axi_bid_next = write_id_reg;
s_axi_bvalid_next = 1'b1;
s_axi_awready_next = 1'b1;
write_state_next = WRITE_STATE_IDLE;
end else begin
write_state_next = WRITE_STATE_RESP;
end
end
default: begin
write_state_next = WRITE_STATE_IDLE;
end
endcase
end
always_ff @(posedge clk) begin
write_state_reg <= write_state_next;
write_id_reg <= write_id_next;
write_addr_reg <= write_addr_next;
write_count_reg <= write_count_next;
write_size_reg <= write_size_next;
write_burst_reg <= write_burst_next;
s_axi_awready_reg <= s_axi_awready_next;
s_axi_wready_reg <= s_axi_wready_next;
s_axi_bid_reg <= s_axi_bid_next;
s_axi_bvalid_reg <= s_axi_bvalid_next;
for (integer i = 0; i < BYTE_LANES; i = i + 1) begin
if (mem_wr_en & s_axi_wr.wstrb[i]) begin
mem[write_addr_valid][BYTE_W*i +: BYTE_W] <= s_axi_wr.wdata[BYTE_W*i +: BYTE_W];
end
end
if (rst) begin
write_state_reg <= WRITE_STATE_IDLE;
s_axi_awready_reg <= 1'b0;
s_axi_wready_reg <= 1'b0;
s_axi_bvalid_reg <= 1'b0;
end
end
always_comb begin
read_state_next = READ_STATE_IDLE;
mem_rd_en = 1'b0;
s_axi_rid_next = s_axi_rid_reg;
s_axi_rlast_next = s_axi_rlast_reg;
s_axi_rvalid_next = s_axi_rvalid_reg && !(s_axi_rd.rready || (PIPELINE_OUTPUT && !s_axi_rvalid_pipe_reg));
read_id_next = read_id_reg;
read_addr_next = read_addr_reg;
read_count_next = read_count_reg;
read_size_next = read_size_reg;
read_burst_next = read_burst_reg;
s_axi_arready_next = 1'b0;
case (read_state_reg)
READ_STATE_IDLE: begin
s_axi_arready_next = 1'b1;
if (s_axi_rd.arready && s_axi_rd.arvalid) begin
read_id_next = s_axi_rd.arid;
read_addr_next = ADDR_W'(s_axi_rd.araddr);
read_count_next = s_axi_rd.arlen;
read_size_next = s_axi_rd.arsize <= 3'($clog2(STRB_W)) ? s_axi_rd.arsize : 3'($clog2(STRB_W));
read_burst_next = s_axi_rd.arburst;
s_axi_arready_next = 1'b0;
read_state_next = READ_STATE_BURST;
end else begin
read_state_next = READ_STATE_IDLE;
end
end
READ_STATE_BURST: begin
if (s_axi_rd.rready || (PIPELINE_OUTPUT && !s_axi_rvalid_pipe_reg) || !s_axi_rvalid_reg) begin
mem_rd_en = 1'b1;
s_axi_rvalid_next = 1'b1;
s_axi_rid_next = read_id_reg;
s_axi_rlast_next = read_count_reg == 0;
if (read_burst_reg != 2'b00) begin
read_addr_next = read_addr_reg + (1 << read_size_reg);
end
read_count_next = read_count_reg - 1;
if (read_count_reg > 0) begin
read_state_next = READ_STATE_BURST;
end else begin
s_axi_arready_next = 1'b1;
read_state_next = READ_STATE_IDLE;
end
end else begin
read_state_next = READ_STATE_BURST;
end
end
endcase
end
always_ff @(posedge clk) begin
read_state_reg <= read_state_next;
read_id_reg <= read_id_next;
read_addr_reg <= read_addr_next;
read_count_reg <= read_count_next;
read_size_reg <= read_size_next;
read_burst_reg <= read_burst_next;
s_axi_arready_reg <= s_axi_arready_next;
s_axi_rid_reg <= s_axi_rid_next;
s_axi_rlast_reg <= s_axi_rlast_next;
s_axi_rvalid_reg <= s_axi_rvalid_next;
if (mem_rd_en) begin
s_axi_rdata_reg <= mem[read_addr_valid];
end
if (!s_axi_rvalid_pipe_reg || s_axi_rd.rready) begin
s_axi_rid_pipe_reg <= s_axi_rid_reg;
s_axi_rdata_pipe_reg <= s_axi_rdata_reg;
s_axi_rlast_pipe_reg <= s_axi_rlast_reg;
s_axi_rvalid_pipe_reg <= s_axi_rvalid_reg;
end
if (rst) begin
read_state_reg <= READ_STATE_IDLE;
s_axi_arready_reg <= 1'b0;
s_axi_rvalid_reg <= 1'b0;
s_axi_rvalid_pipe_reg <= 1'b0;
end
end
endmodule
`resetall

View File

@@ -0,0 +1,49 @@
# SPDX-License-Identifier: CERN-OHL-S-2.0
#
# Copyright (c) 2020-2025 FPGA Ninja, LLC
#
# Authors:
# - Alex Forencich
TOPLEVEL_LANG = verilog
SIM ?= verilator
WAVES ?= 0
COCOTB_HDL_TIMEUNIT = 1ns
COCOTB_HDL_TIMEPRECISION = 1ps
DUT = taxi_axi_ram
COCOTB_TEST_MODULES = test_$(DUT)
COCOTB_TOPLEVEL = test_$(DUT)
MODULE = $(COCOTB_TEST_MODULES)
TOPLEVEL = $(COCOTB_TOPLEVEL)
VERILOG_SOURCES += $(COCOTB_TOPLEVEL).sv
VERILOG_SOURCES += ../../../rtl/axi/$(DUT).sv
VERILOG_SOURCES += ../../../rtl/axi/taxi_axi_if.sv
# handle file list files
process_f_file = $(call process_f_files,$(addprefix $(dir $1),$(shell cat $1)))
process_f_files = $(foreach f,$1,$(if $(filter %.f,$f),$(call process_f_file,$f),$f))
uniq_base = $(if $1,$(call uniq_base,$(foreach f,$1,$(if $(filter-out $(notdir $(lastword $1)),$(notdir $f)),$f,))) $(lastword $1))
VERILOG_SOURCES := $(call uniq_base,$(call process_f_files,$(VERILOG_SOURCES)))
# module parameters
export PARAM_DATA_W := 32
export PARAM_ADDR_W := 16
export PARAM_PIPELINE_OUTPUT := 0
ifeq ($(SIM), icarus)
PLUSARGS += -fst
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(COCOTB_TOPLEVEL).$(subst PARAM_,,$(v))=$($(v)))
else ifeq ($(SIM), verilator)
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v)))
ifeq ($(WAVES), 1)
COMPILE_ARGS += --trace-fst
VERILATOR_TRACE = 1
endif
endif
include $(shell cocotb-config --makefiles)/Makefile.sim

View File

@@ -0,0 +1,240 @@
#!/usr/bin/env python
# SPDX-License-Identifier: CERN-OHL-S-2.0
"""
Copyright (c) 2020-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
"""
import itertools
import logging
import os
import random
import cocotb_test.simulator
import pytest
import cocotb
from cocotb.clock import Clock
from cocotb.triggers import RisingEdge, Timer
from cocotb.regression import TestFactory
from cocotbext.axi import AxiBus, AxiMaster
class TB(object):
def __init__(self, dut):
self.dut = dut
self.log = logging.getLogger("cocotb.tb")
self.log.setLevel(logging.DEBUG)
cocotb.start_soon(Clock(dut.clk, 10, units="ns").start())
self.axi_master = AxiMaster(AxiBus.from_entity(dut.s_axi), dut.clk, dut.rst)
def set_idle_generator(self, generator=None):
if generator:
self.axi_master.write_if.aw_channel.set_pause_generator(generator())
self.axi_master.write_if.w_channel.set_pause_generator(generator())
self.axi_master.read_if.ar_channel.set_pause_generator(generator())
def set_backpressure_generator(self, generator=None):
if generator:
self.axi_master.write_if.b_channel.set_pause_generator(generator())
self.axi_master.read_if.r_channel.set_pause_generator(generator())
async def cycle_reset(self):
self.dut.rst.setimmediatevalue(0)
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
self.dut.rst.value = 1
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
self.dut.rst.value = 0
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
async def run_test_write(dut, data_in=None, idle_inserter=None, backpressure_inserter=None, size=None):
tb = TB(dut)
byte_lanes = tb.axi_master.write_if.byte_lanes
max_burst_size = tb.axi_master.write_if.max_burst_size
if size is None:
size = max_burst_size
await tb.cycle_reset()
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
for length in list(range(1, byte_lanes*2))+[1024]:
for offset in list(range(byte_lanes, byte_lanes*2))+list(range(4096-byte_lanes, 4096)):
tb.log.info("length %d, offset %d, size %d", length, offset, size)
addr = offset+0x1000
test_data = bytearray([x % 256 for x in range(length)])
await tb.axi_master.write(addr-4, b'\xaa'*(length+8))
await tb.axi_master.write(addr, test_data, size=size)
data = await tb.axi_master.read(addr-1, length+2)
assert data.data == b'\xaa'+test_data+b'\xaa'
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
async def run_test_read(dut, data_in=None, idle_inserter=None, backpressure_inserter=None, size=None):
tb = TB(dut)
byte_lanes = tb.axi_master.write_if.byte_lanes
max_burst_size = tb.axi_master.write_if.max_burst_size
if size is None:
size = max_burst_size
await tb.cycle_reset()
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
for length in list(range(1, byte_lanes*2))+[1024]:
for offset in list(range(byte_lanes, byte_lanes*2))+list(range(4096-byte_lanes, 4096)):
tb.log.info("length %d, offset %d, size %d", length, offset, size)
addr = offset+0x1000
test_data = bytearray([x % 256 for x in range(length)])
await tb.axi_master.write(addr, test_data)
data = await tb.axi_master.read(addr, length, size=size)
assert data.data == test_data
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
async def run_stress_test(dut, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
await tb.cycle_reset()
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
async def worker(master, offset, aperture, count=16):
for k in range(count):
length = random.randint(1, min(512, aperture))
addr = offset+random.randint(0, aperture-length)
test_data = bytearray([x % 256 for x in range(length)])
await Timer(random.randint(1, 100), 'ns')
await master.write(addr, test_data)
await Timer(random.randint(1, 100), 'ns')
data = await master.read(addr, length)
assert data.data == test_data
workers = []
for k in range(16):
workers.append(cocotb.start_soon(worker(tb.axi_master, k*0x1000, 0x1000, count=16)))
while workers:
await workers.pop(0).join()
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
def cycle_pause():
return itertools.cycle([1, 1, 1, 0])
if cocotb.SIM_NAME:
data_width = len(cocotb.top.s_axi.wdata)
byte_lanes = data_width // 8
max_burst_size = (byte_lanes-1).bit_length()
for test in [run_test_write, run_test_read]:
factory = TestFactory(test)
factory.add_option("idle_inserter", [None, cycle_pause])
factory.add_option("backpressure_inserter", [None, cycle_pause])
factory.add_option("size", [None]+list(range(max_burst_size)))
factory.generate_tests()
factory = TestFactory(run_stress_test)
factory.add_option("idle_inserter", [None, cycle_pause])
factory.add_option("backpressure_inserter", [None, cycle_pause])
factory.generate_tests()
# cocotb-test
tests_dir = os.path.abspath(os.path.dirname(__file__))
rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', '..', 'rtl'))
def process_f_files(files):
lst = {}
for f in files:
if f[-2:].lower() == '.f':
with open(f, 'r') as fp:
l = fp.read().split()
for f in process_f_files([os.path.join(os.path.dirname(f), x) for x in l]):
lst[os.path.basename(f)] = f
else:
lst[os.path.basename(f)] = f
return list(lst.values())
@pytest.mark.parametrize("data_w", [8, 16, 32])
def test_taxi_axi_ram(request, data_w):
dut = "taxi_axi_ram"
module = os.path.splitext(os.path.basename(__file__))[0]
toplevel = module
verilog_sources = [
os.path.join(tests_dir, f"{toplevel}.sv"),
os.path.join(rtl_dir, "axi", f"{dut}.sv"),
os.path.join(rtl_dir, "axi", "taxi_axi_if.sv"),
]
verilog_sources = process_f_files(verilog_sources)
parameters = {}
parameters['DATA_W'] = data_w
parameters['ADDR_W'] = 16
parameters['ID_W'] = 8
parameters['PIPELINE_OUTPUT'] = 0
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
sim_build = os.path.join(tests_dir, "sim_build",
request.node.name.replace('[', '-').replace(']', ''))
cocotb_test.simulator.run(
simulator="verilator",
python_search=[tests_dir],
verilog_sources=verilog_sources,
toplevel=toplevel,
module=module,
parameters=parameters,
sim_build=sim_build,
extra_env=extra_env,
)

View File

@@ -0,0 +1,55 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* AXI4 RAM testbench
*/
module test_taxi_axi_ram #
(
/* verilator lint_off WIDTHTRUNC */
parameter DATA_W = 32,
parameter ADDR_W = 16,
parameter ID_W = 8,
parameter PIPELINE_OUTPUT = 0
/* verilator lint_on WIDTHTRUNC */
)
();
logic clk;
logic rst;
taxi_axi_if #(
.DATA_W(DATA_W),
.ADDR_W(ADDR_W+16),
.ID_W(ID_W)
) s_axi(), m_axi();
taxi_axi_ram #(
.ADDR_W(ADDR_W),
.PIPELINE_OUTPUT(PIPELINE_OUTPUT)
)
uut (
.clk(clk),
.rst(rst),
/*
* AXI4-Lite slave interface
*/
.s_axi_wr(s_axi),
.s_axi_rd(s_axi)
);
endmodule
`resetall