pcie: Add UltraScale PCIe AXI Lite Master module and testbench

Signed-off-by: Alex Forencich <alex@alexforencich.com>
This commit is contained in:
Alex Forencich
2025-08-25 22:39:28 -07:00
parent 06e6f3e1b4
commit b5c9c02b03
4 changed files with 1442 additions and 0 deletions

View File

@@ -0,0 +1,821 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2018-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* UltraScale PCIe AXI Lite Master
*/
module taxi_pcie_us_axil_master
(
input wire logic clk,
input wire logic rst,
/*
* UltraScale PCIe interface
*/
taxi_axis_if.snk s_axis_cq,
taxi_axis_if.src m_axis_cc,
/*
* AXI Lite Master output
*/
taxi_axil_if.wr_mst m_axil_wr,
taxi_axil_if.rd_mst m_axil_rd,
/*
* Configuration
*/
input wire logic [15:0] completer_id,
input wire logic completer_id_enable,
/*
* Status
*/
output wire logic stat_err_cor,
output wire logic stat_err_uncor
);
// extract parameters
localparam AXIS_PCIE_DATA_W = s_axis_cq.DATA_W;
localparam AXIS_PCIE_KEEP_W = s_axis_cq.KEEP_W;
localparam AXIS_PCIE_CQ_USER_W = s_axis_cq.USER_W;
localparam AXIS_PCIE_CC_USER_W = m_axis_cc.USER_W;
localparam AXI_DATA_W = m_axil_wr.DATA_W;
localparam AXI_ADDR_W = m_axil_wr.ADDR_W;
localparam AXI_STRB_W = m_axil_wr.STRB_W;
// check configuration
if (AXIS_PCIE_DATA_W != 64 && AXIS_PCIE_DATA_W != 128 && AXIS_PCIE_DATA_W != 256 && AXIS_PCIE_DATA_W != 512)
$fatal(0, "Error: PCIe interface width must be 64, 128, 256, or 512 (instance %m)");
if (AXIS_PCIE_KEEP_W * 32 != AXIS_PCIE_DATA_W)
$fatal(0, "Error: PCIe interface requires dword (32-bit) granularity (instance %m)");
if (AXIS_PCIE_DATA_W == 512) begin
if (AXIS_PCIE_CQ_USER_W != 183)
$fatal(0, "Error: PCIe CQ tuser width must be 183 (instance %m)");
if (AXIS_PCIE_CC_USER_W != 81)
$fatal(0, "Error: PCIe CC tuser width must be 81 (instance %m)");
end else begin
if (AXIS_PCIE_CQ_USER_W != 85 && AXIS_PCIE_CQ_USER_W != 88)
$fatal(0, "Error: PCIe CQ tuser width must be 85 or 88 (instance %m)");
if (AXIS_PCIE_CC_USER_W != 33)
$fatal(0, "Error: PCIe CC tuser width must be 33 (instance %m)");
end
if (AXI_DATA_W != 32)
$fatal(0, "Error: AXI interface width must be 32 (instance %m)");
if (AXI_STRB_W * 8 != AXI_DATA_W)
$fatal(0, "Error: AXI interface requires byte (8-bit) granularity (instance %m)");
localparam [3:0]
REQ_MEM_READ = 4'b0000,
REQ_MEM_WRITE = 4'b0001,
REQ_IO_READ = 4'b0010,
REQ_IO_WRITE = 4'b0011,
REQ_MEM_FETCH_ADD = 4'b0100,
REQ_MEM_SWAP = 4'b0101,
REQ_MEM_CAS = 4'b0110,
REQ_MEM_READ_LOCKED = 4'b0111,
REQ_CFG_READ_0 = 4'b1000,
REQ_CFG_READ_1 = 4'b1001,
REQ_CFG_WRITE_0 = 4'b1010,
REQ_CFG_WRITE_1 = 4'b1011,
REQ_MSG = 4'b1100,
REQ_MSG_VENDOR = 4'b1101,
REQ_MSG_ATS = 4'b1110;
localparam [2:0]
CPL_STATUS_SC = 3'b000, // successful completion
CPL_STATUS_UR = 3'b001, // unsupported request
CPL_STATUS_CRS = 3'b010, // configuration request retry status
CPL_STATUS_CA = 3'b100; // completer abort
localparam [2:0]
STATE_IDLE = 3'd0,
STATE_HEADER = 3'd1,
STATE_READ = 3'd2,
STATE_WRITE_1 = 3'd3,
STATE_WRITE_2 = 3'd4,
STATE_WAIT_END = 3'd5,
STATE_CPL_1 = 3'd6,
STATE_CPL_2 = 3'd7;
wire [63:0] req_tlp_hdr_addr;
wire [10:0] req_tlp_hdr_length;
wire [3:0] req_tlp_hdr_type;
wire [15:0] req_tlp_hdr_requester_id;
wire [7:0] req_tlp_hdr_tag;
wire [2:0] req_tlp_hdr_tc;
wire [2:0] req_tlp_hdr_attr;
wire [3:0] req_tlp_hdr_first_be;
wire [3:0] req_tlp_hdr_last_be;
wire [31:0] req_tlp_data;
if (AXIS_PCIE_DATA_W == 64) begin
assign req_tlp_hdr_addr = {s_axis_cq.tdata[63:2], 2'b00};
assign req_tlp_hdr_length = s_axis_cq.tdata[10:0];
assign req_tlp_hdr_type = s_axis_cq.tdata[14:11];
assign req_tlp_hdr_requester_id = s_axis_cq.tdata[31:16];
assign req_tlp_hdr_tag = s_axis_cq.tdata[39:32];
assign req_tlp_hdr_tc = s_axis_cq.tdata[59:57];
assign req_tlp_hdr_attr = s_axis_cq.tdata[62:60];
end else begin
assign req_tlp_hdr_addr = {s_axis_cq.tdata[63:2], 2'b00};
assign req_tlp_hdr_length = s_axis_cq.tdata[74:64];
assign req_tlp_hdr_type = s_axis_cq.tdata[78:75];
assign req_tlp_hdr_requester_id = s_axis_cq.tdata[95:80];
assign req_tlp_hdr_tag = s_axis_cq.tdata[103:96];
assign req_tlp_hdr_tc = s_axis_cq.tdata[123:121];
assign req_tlp_hdr_attr = s_axis_cq.tdata[126:124];
end
if (AXIS_PCIE_DATA_W == 512) begin
assign req_tlp_hdr_first_be = s_axis_cq.tuser[3:0];
assign req_tlp_hdr_last_be = s_axis_cq.tuser[11:8];
end else begin
assign req_tlp_hdr_first_be = s_axis_cq.tuser[3:0];
assign req_tlp_hdr_last_be = s_axis_cq.tuser[7:4];
end
if (AXIS_PCIE_DATA_W >= 256) begin
assign req_tlp_data = s_axis_cq.tdata[159:128];
end else begin
assign req_tlp_data = s_axis_cq.tdata[31:0];
end
logic [95:0] cpl_tlp_hdr;
logic [32:0] cpl_tuser_1;
logic [80:0] cpl_tuser_2;
logic [2:0] state_reg = STATE_IDLE, state_next;
logic [10:0] dword_count_reg = '0, dword_count_next;
logic [3:0] type_reg = '0, type_next;
logic [2:0] status_reg = '0, status_next;
logic [15:0] requester_id_reg = '0, requester_id_next;
logic [7:0] tag_reg = '0, tag_next;
logic [2:0] tc_reg = '0, tc_next;
logic [2:0] attr_reg = '0, attr_next;
logic [3:0] first_be_reg = '0, first_be_next;
logic [3:0] last_be_reg = '0, last_be_next;
logic cpl_data_reg = 1'b0, cpl_data_next;
logic s_axis_cq_tready_reg = 1'b0, s_axis_cq_tready_next;
logic [AXI_ADDR_W-1:0] m_axil_addr_reg = '0, m_axil_addr_next;
logic m_axil_awvalid_reg = 1'b0, m_axil_awvalid_next;
logic [AXI_DATA_W-1:0] m_axil_wdata_reg = '0, m_axil_wdata_next;
logic [AXI_STRB_W-1:0] m_axil_wstrb_reg = '0, m_axil_wstrb_next;
logic m_axil_wvalid_reg = 1'b0, m_axil_wvalid_next;
logic m_axil_bready_reg = 1'b0, m_axil_bready_next;
logic m_axil_arvalid_reg = 1'b0, m_axil_arvalid_next;
logic m_axil_rready_reg = 1'b0, m_axil_rready_next;
logic stat_err_cor_reg = 1'b0, stat_err_cor_next;
logic stat_err_uncor_reg = 1'b0, stat_err_uncor_next;
// internal datapath
logic [AXIS_PCIE_DATA_W-1:0] m_axis_cc_tdata_int;
logic [AXIS_PCIE_KEEP_W-1:0] m_axis_cc_tkeep_int;
logic m_axis_cc_tvalid_int;
logic m_axis_cc_tready_int_reg = 1'b0;
logic m_axis_cc_tlast_int;
logic [AXIS_PCIE_CC_USER_W-1:0] m_axis_cc_tuser_int;
wire m_axis_cc_tready_int_early;
assign s_axis_cq.tready = s_axis_cq_tready_reg;
assign m_axil_wr.awaddr = m_axil_addr_reg;
assign m_axil_wr.awprot = 3'b010;
assign m_axil_wr.awvalid = m_axil_awvalid_reg;
assign m_axil_wr.wdata = m_axil_wdata_reg;
assign m_axil_wr.wstrb = m_axil_wstrb_reg;
assign m_axil_wr.wvalid = m_axil_wvalid_reg;
assign m_axil_wr.bready = m_axil_bready_reg;
assign m_axil_rd.araddr = m_axil_addr_reg;
assign m_axil_rd.arprot = 3'b010;
assign m_axil_rd.arvalid = m_axil_arvalid_reg;
assign m_axil_rd.rready = m_axil_rready_reg;
assign stat_err_cor = stat_err_cor_reg;
assign stat_err_uncor = stat_err_uncor_reg;
always_comb begin
state_next = STATE_IDLE;
s_axis_cq_tready_next = 1'b0;
dword_count_next = dword_count_reg;
type_next = type_reg;
status_next = status_reg;
requester_id_next = requester_id_reg;
tag_next = tag_reg;
tc_next = tc_reg;
attr_next = attr_reg;
first_be_next = first_be_reg;
last_be_next = last_be_reg;
cpl_data_next = cpl_data_reg;
m_axis_cc_tdata_int = '0;
m_axis_cc_tkeep_int = '0;
m_axis_cc_tvalid_int = 1'b0;
m_axis_cc_tlast_int = 1'b0;
m_axis_cc_tuser_int = '0;
casez (first_be_reg)
4'b0000: cpl_tlp_hdr[6:0] = {m_axil_addr_reg[6:2], 2'b00}; // lower address
4'bzzz1: cpl_tlp_hdr[6:0] = {m_axil_addr_reg[6:2], 2'b00}; // lower address
4'bzz10: cpl_tlp_hdr[6:0] = {m_axil_addr_reg[6:2], 2'b01}; // lower address
4'bz100: cpl_tlp_hdr[6:0] = {m_axil_addr_reg[6:2], 2'b10}; // lower address
4'b1000: cpl_tlp_hdr[6:0] = {m_axil_addr_reg[6:2], 2'b11}; // lower address
endcase
cpl_tlp_hdr[9:8] = 2'b00; // AT
casez (first_be_reg)
4'b0000: cpl_tlp_hdr[28:16] = 13'd1; // Byte count
4'b0001: cpl_tlp_hdr[28:16] = 13'd1; // Byte count
4'b0010: cpl_tlp_hdr[28:16] = 13'd1; // Byte count
4'b0100: cpl_tlp_hdr[28:16] = 13'd1; // Byte count
4'b1000: cpl_tlp_hdr[28:16] = 13'd1; // Byte count
4'b0011: cpl_tlp_hdr[28:16] = 13'd2; // Byte count
4'b0110: cpl_tlp_hdr[28:16] = 13'd2; // Byte count
4'b1100: cpl_tlp_hdr[28:16] = 13'd2; // Byte count
4'b01z1: cpl_tlp_hdr[28:16] = 13'd3; // Byte count
4'b1z10: cpl_tlp_hdr[28:16] = 13'd3; // Byte count
4'b1zz1: cpl_tlp_hdr[28:16] = 13'd4; // Byte count
endcase
cpl_tlp_hdr[42:32] = cpl_data_reg ? 11'd1 : 11'd0; // DWORD count
cpl_tlp_hdr[45:43] = status_reg;
cpl_tlp_hdr[63:48] = requester_id_reg;
cpl_tlp_hdr[71:64] = tag_reg;
cpl_tlp_hdr[87:72] = completer_id;
cpl_tlp_hdr[88] = completer_id_enable;
cpl_tlp_hdr[91:89] = tc_reg;
cpl_tlp_hdr[94:92] = attr_reg;
cpl_tlp_hdr[95] = 1'b0; // force ECRC
// CC tuser sideband for 64 through 256-bit interface width
cpl_tuser_1[0] = 1'b0; // discontinue
cpl_tuser_1[32:1] = 32'd0; // parity
// CC tuser sideband for 512-bit interface width
cpl_tuser_2[1:0] = 2'b01; // is_sop
cpl_tuser_2[3:2] = 2'd0; // is_sop0_ptr
cpl_tuser_2[5:4] = 2'd0; // is_sop1_ptr
cpl_tuser_2[7:6] = 2'b01; // is_eop
cpl_tuser_2[11:8] = cpl_data_reg ? 4'd3 : 4'd2; // is_eop0_ptr
cpl_tuser_2[15:12] = 4'd0; // is_eop1_ptr
cpl_tuser_2[16] = 1'b0; // discontinue
cpl_tuser_2[80:17] = 64'd0; // parity
if (AXIS_PCIE_DATA_W == 64) begin
m_axis_cc_tdata_int = AXIS_PCIE_DATA_W'(cpl_tlp_hdr[63:0]);
m_axis_cc_tkeep_int = AXIS_PCIE_KEEP_W'(2'b11);
m_axis_cc_tlast_int = 1'b0;
end else begin
m_axis_cc_tdata_int = AXIS_PCIE_DATA_W'({m_axil_rd.rdata, cpl_tlp_hdr});
m_axis_cc_tkeep_int = AXIS_PCIE_KEEP_W'({cpl_data_reg, 3'b111});
m_axis_cc_tlast_int = 1'b1;
end
if (AXIS_PCIE_DATA_W == 512) begin
m_axis_cc_tuser_int = AXIS_PCIE_CC_USER_W'(cpl_tuser_2);
end else begin
m_axis_cc_tuser_int = AXIS_PCIE_CC_USER_W'(cpl_tuser_1);
end
m_axil_addr_next = m_axil_addr_reg;
m_axil_awvalid_next = m_axil_awvalid_reg && !m_axil_wr.awready;
m_axil_wdata_next = m_axil_wdata_reg;
m_axil_wstrb_next = m_axil_wstrb_reg;
m_axil_wvalid_next = m_axil_wvalid_reg && !m_axil_wr.wready;
m_axil_bready_next = 1'b0;
m_axil_arvalid_next = m_axil_arvalid_reg && !m_axil_rd.arready;
m_axil_rready_next = 1'b0;
stat_err_cor_next = 1'b0;
stat_err_uncor_next = 1'b0;
case (state_reg)
STATE_IDLE: begin
// idle state, wait for completion request
s_axis_cq_tready_next = m_axis_cc_tready_int_early;
if (s_axis_cq.tready && s_axis_cq.tvalid) begin
// header fields
m_axil_addr_next = req_tlp_hdr_addr;
if (AXIS_PCIE_DATA_W > 64) begin
dword_count_next = req_tlp_hdr_length;
type_next = req_tlp_hdr_type;
requester_id_next = req_tlp_hdr_requester_id;
tag_next = req_tlp_hdr_tag;
tc_next = req_tlp_hdr_tc;
attr_next = req_tlp_hdr_attr;
// data
if (AXIS_PCIE_DATA_W >= 256) begin
m_axil_wdata_next = req_tlp_data;
end
end
first_be_next = req_tlp_hdr_first_be;
last_be_next = req_tlp_hdr_last_be;
m_axil_wstrb_next = first_be_next;
cpl_data_next = 1'b1;
status_next = CPL_STATUS_SC; // successful completion
if (AXIS_PCIE_DATA_W == 64) begin
if (s_axis_cq.tlast) begin
// truncated packet
// report uncorrectable error
stat_err_uncor_next = 1'b1;
state_next = STATE_IDLE;
end else begin
state_next = STATE_HEADER;
end
end else begin
if (type_next == REQ_MEM_READ || type_next == REQ_IO_READ) begin
// read request
cpl_data_next = 1'b1;
if (s_axis_cq.tlast && dword_count_next == 11'd1) begin
m_axil_arvalid_next = 1'b1;
m_axil_rready_next = m_axis_cc_tready_int_early;
s_axis_cq_tready_next = 1'b0;
state_next = STATE_READ;
end else begin
// bad length
cpl_data_next = 1'b0;
status_next = CPL_STATUS_CA; // completer abort
// report correctable error
stat_err_cor_next = 1'b1;
if (s_axis_cq.tlast) begin
s_axis_cq_tready_next = 1'b0;
state_next = STATE_CPL_1;
end else begin
s_axis_cq_tready_next = 1'b1;
state_next = STATE_WAIT_END;
end
end
end else if (type_next == REQ_MEM_WRITE || type_next == REQ_IO_WRITE) begin
// write request
cpl_data_next = 1'b0;
if (AXIS_PCIE_DATA_W >= 256 && s_axis_cq.tlast && dword_count_next == 11'd1) begin
m_axil_awvalid_next = 1'b1;
m_axil_wvalid_next = 1'b1;
m_axil_bready_next = 1'b1;
s_axis_cq_tready_next = 1'b0;
state_next = STATE_WRITE_2;
end else if (AXIS_PCIE_DATA_W < 256 && dword_count_next == 11'd1) begin
s_axis_cq_tready_next = 1'b1;
state_next = STATE_WRITE_1;
end else begin
// bad length
status_next = CPL_STATUS_CA; // completer abort
if (type_next == REQ_MEM_WRITE) begin
// memory write - posted, no completion
// report uncorrectable error
stat_err_uncor_next = 1'b1;
if (s_axis_cq.tlast) begin
s_axis_cq_tready_next = m_axis_cc_tready_int_early;
state_next = STATE_IDLE;
end else begin
s_axis_cq_tready_next = 1'b1;
state_next = STATE_WAIT_END;
end
end else begin
// IO write - non-posted, send completion
// report correctable error
stat_err_cor_next = 1'b1;
if (s_axis_cq.tlast) begin
s_axis_cq_tready_next = 1'b0;
state_next = STATE_CPL_1;
end else begin
s_axis_cq_tready_next = 1'b1;
state_next = STATE_WAIT_END;
end
end
end
end else begin
// other request
cpl_data_next = 1'b0;
status_next = CPL_STATUS_UR; // unsupported request
if (type_next == REQ_MEM_WRITE || (type_next & 4'b1100) == 4'b1100) begin
// memory write or message - posted, no completion
// report uncorrectable error
stat_err_uncor_next = 1'b1;
if (s_axis_cq.tlast) begin
s_axis_cq_tready_next = m_axis_cc_tready_int_early;
state_next = STATE_IDLE;
end else begin
s_axis_cq_tready_next = 1'b1;
state_next = STATE_WAIT_END;
end
end else begin
// other non-posted request, send UR completion
// report correctable error
stat_err_cor_next = 1'b1;
if (s_axis_cq.tlast) begin
s_axis_cq_tready_next = 1'b0;
state_next = STATE_CPL_1;
end else begin
s_axis_cq_tready_next = 1'b1;
state_next = STATE_WAIT_END;
end
end
end
end
end else begin
state_next = STATE_IDLE;
end
end
STATE_HEADER: begin
// header state, handle header (64-bit interface only)
if (AXIS_PCIE_DATA_W == 64) begin
s_axis_cq_tready_next = m_axis_cc_tready_int_early;
// header fields
dword_count_next = req_tlp_hdr_length;
type_next = req_tlp_hdr_type;
requester_id_next = req_tlp_hdr_requester_id;
tag_next = req_tlp_hdr_tag;
tc_next = req_tlp_hdr_tc;
attr_next = req_tlp_hdr_attr;
// data
m_axil_wstrb_next = first_be_reg;
if (s_axis_cq.tready && s_axis_cq.tvalid) begin
if (type_next == REQ_MEM_READ || type_next == REQ_IO_READ) begin
// read request
cpl_data_next = 1'b1;
if (s_axis_cq.tlast && dword_count_next == 11'd1) begin
m_axil_arvalid_next = 1'b1;
m_axil_rready_next = m_axis_cc_tready_int_early;
s_axis_cq_tready_next = 1'b0;
state_next = STATE_READ;
end else begin
// bad length
cpl_data_next = 1'b0;
status_next = CPL_STATUS_CA; // completer abort
// report correctable error
stat_err_cor_next = 1'b1;
if (s_axis_cq.tlast) begin
s_axis_cq_tready_next = 1'b0;
state_next = STATE_CPL_1;
end else begin
s_axis_cq_tready_next = 1'b1;
state_next = STATE_WAIT_END;
end
end
end else if (type_next == REQ_MEM_WRITE || type_next == REQ_IO_WRITE) begin
// write request
cpl_data_next = 1'b0;
if (dword_count_next == 11'd1) begin
s_axis_cq_tready_next = 1'b1;
state_next = STATE_WRITE_1;
end else begin
// bad length
status_next = CPL_STATUS_CA; // completer abort
if (type_next == REQ_MEM_WRITE) begin
// memory write - posted, no completion
// report uncorrectable error
stat_err_uncor_next = 1'b1;
if (s_axis_cq.tlast) begin
s_axis_cq_tready_next = m_axis_cc_tready_int_early;
state_next = STATE_IDLE;
end else begin
s_axis_cq_tready_next = 1'b1;
state_next = STATE_WAIT_END;
end
end else begin
// other non-posted request, send UR completion
// report correctable error
stat_err_cor_next = 1'b1;
if (s_axis_cq.tlast) begin
s_axis_cq_tready_next = 1'b0;
state_next = STATE_CPL_1;
end else begin
s_axis_cq_tready_next = 1'b1;
state_next = STATE_WAIT_END;
end
end
end
end else begin
// other request
cpl_data_next = 1'b0;
status_next = CPL_STATUS_UR; // unsupported request
if (type_next == REQ_MEM_WRITE || (type_next & 4'b1100) == 4'b1100) begin
// memory write or message - posted, no completion
// report uncorrectable error
stat_err_uncor_next = 1'b1;
if (s_axis_cq.tlast) begin
s_axis_cq_tready_next = m_axis_cc_tready_int_early;
state_next = STATE_IDLE;
end else begin
s_axis_cq_tready_next = 1'b1;
state_next = STATE_WAIT_END;
end
end else begin
// other non-posted request, send UR completion
// report correctable error
stat_err_cor_next = 1'b1;
if (s_axis_cq.tlast) begin
s_axis_cq_tready_next = 1'b0;
state_next = STATE_CPL_1;
end else begin
s_axis_cq_tready_next = 1'b1;
state_next = STATE_WAIT_END;
end
end
end
end else begin
state_next = STATE_HEADER;
end
end
end
STATE_READ: begin
// read state, wait for read response
m_axil_rready_next = m_axis_cc_tready_int_early;
if (m_axil_rd.rready && m_axil_rd.rvalid) begin
// send completion
m_axis_cc_tvalid_int = 1'b1;
m_axil_rready_next = 1'b0;
if (AXIS_PCIE_DATA_W == 64) begin
cpl_data_next = 1'b1;
state_next = STATE_CPL_2;
end else begin
s_axis_cq_tready_next = m_axis_cc_tready_int_early;
state_next = STATE_IDLE;
end
end else begin
state_next = STATE_READ;
end
end
STATE_WRITE_1: begin
// write 1 state, store write data and initiate write
s_axis_cq_tready_next = 1'b1;
// data
m_axil_wdata_next = req_tlp_data;
if (s_axis_cq.tready && s_axis_cq.tvalid) begin
if (s_axis_cq.tlast) begin
m_axil_awvalid_next = 1'b1;
m_axil_wvalid_next = 1'b1;
m_axil_bready_next = m_axis_cc_tready_int_early;
s_axis_cq_tready_next = 1'b0;
state_next = STATE_WRITE_2;
end else begin
s_axis_cq_tready_next = 1'b1;
state_next = STATE_WAIT_END;
end
end else begin
state_next = STATE_WRITE_1;
end
end
STATE_WRITE_2: begin
// write 2 state, handle write response
m_axil_bready_next = m_axis_cc_tready_int_early;
if (m_axil_wr.bready && m_axil_wr.bvalid) begin
m_axil_bready_next = 1'b0;
if (type_reg == REQ_MEM_WRITE) begin
// memory write - posted, no completion
s_axis_cq_tready_next = m_axis_cc_tready_int_early;
state_next = STATE_IDLE;
end else begin
// IO write - non-posted, send completion
m_axis_cc_tvalid_int = 1'b1;
if (AXIS_PCIE_DATA_W == 64) begin
state_next = STATE_CPL_2;
end else begin
s_axis_cq_tready_next = m_axis_cc_tready_int_early;
state_next = STATE_IDLE;
end
end
end else begin
state_next = STATE_WRITE_2;
end
end
STATE_WAIT_END: begin
// wait end state, wait for end of completion request
s_axis_cq_tready_next = 1'b1;
if (s_axis_cq.tready && s_axis_cq.tvalid) begin
if (s_axis_cq.tlast) begin
// completion
if (type_reg == REQ_MEM_WRITE || (type_reg & 4'b1100) == 4'b1100) begin
// memory write or message - posted, no completion
s_axis_cq_tready_next = m_axis_cc_tready_int_early;
state_next = STATE_IDLE;
end else begin
// IO write - non-posted, send completion
m_axis_cc_tvalid_int = 1'b1;
if (m_axis_cc_tready_int_reg) begin
if (AXIS_PCIE_DATA_W == 64) begin
state_next = STATE_CPL_2;
end else begin
s_axis_cq_tready_next = m_axis_cc_tready_int_early;
state_next = STATE_IDLE;
end
end else begin
state_next = STATE_CPL_1;
end
end
end else begin
state_next = STATE_WAIT_END;
end
end else begin
state_next = STATE_WAIT_END;
end
end
STATE_CPL_1: begin
// send completion
m_axis_cc_tvalid_int = 1'b1;
if (m_axis_cc_tready_int_reg) begin
if (AXIS_PCIE_DATA_W == 64) begin
cpl_data_next = 1'b0;
state_next = STATE_CPL_2;
end else begin
s_axis_cq_tready_next = m_axis_cc_tready_int_early;
state_next = STATE_IDLE;
end
end else begin
state_next = STATE_CPL_1;
end
end
STATE_CPL_2: begin
// send rest of completion (64-bit interface only)
if (AXIS_PCIE_DATA_W == 64) begin
m_axis_cc_tvalid_int = 1'b1;
m_axis_cc_tdata_int = AXIS_PCIE_DATA_W'({m_axil_rd.rdata, cpl_tlp_hdr[95:64]});
m_axis_cc_tkeep_int = AXIS_PCIE_KEEP_W'({cpl_data_reg, 1'b1});
m_axis_cc_tlast_int = 1'b1;
if (m_axis_cc_tready_int_reg) begin
s_axis_cq_tready_next = m_axis_cc_tready_int_early;
state_next = STATE_IDLE;
end else begin
state_next = STATE_CPL_2;
end
end
end
endcase
end
always_ff @(posedge clk) begin
state_reg <= state_next;
dword_count_reg <= dword_count_next;
type_reg <= type_next;
tag_reg <= tag_next;
status_reg <= status_next;
requester_id_reg <= requester_id_next;
tc_reg <= tc_next;
attr_reg <= attr_next;
first_be_reg <= first_be_next;
last_be_reg <= last_be_next;
cpl_data_reg <= cpl_data_next;
s_axis_cq_tready_reg <= s_axis_cq_tready_next;
m_axil_addr_reg <= m_axil_addr_next;
m_axil_awvalid_reg <= m_axil_awvalid_next;
m_axil_wdata_reg <= m_axil_wdata_next;
m_axil_wstrb_reg <= m_axil_wstrb_next;
m_axil_wvalid_reg <= m_axil_wvalid_next;
m_axil_bready_reg <= m_axil_bready_next;
m_axil_arvalid_reg <= m_axil_arvalid_next;
m_axil_rready_reg <= m_axil_rready_next;
stat_err_cor_reg <= stat_err_cor_next;
stat_err_uncor_reg <= stat_err_uncor_next;
if (rst) begin
state_reg <= STATE_IDLE;
s_axis_cq_tready_reg <= 1'b0;
m_axil_awvalid_reg <= 1'b0;
m_axil_wvalid_reg <= 1'b0;
m_axil_bready_reg <= 1'b0;
m_axil_arvalid_reg <= 1'b0;
m_axil_rready_reg <= 1'b0;
stat_err_cor_reg <= 1'b0;
stat_err_uncor_reg <= 1'b0;
end
end
// output datapath logic
reg [AXIS_PCIE_DATA_W-1:0] m_axis_cc_tdata_reg = '0;
reg [AXIS_PCIE_KEEP_W-1:0] m_axis_cc_tkeep_reg = '0;
reg m_axis_cc_tvalid_reg = 1'b0, m_axis_cc_tvalid_next;
reg m_axis_cc_tlast_reg = 1'b0;
reg [AXIS_PCIE_CC_USER_W-1:0] m_axis_cc_tuser_reg = '0;
reg [AXIS_PCIE_DATA_W-1:0] temp_m_axis_cc_tdata_reg = '0;
reg [AXIS_PCIE_KEEP_W-1:0] temp_m_axis_cc_tkeep_reg = '0;
reg temp_m_axis_cc_tvalid_reg = 1'b0, temp_m_axis_cc_tvalid_next;
reg temp_m_axis_cc_tlast_reg = 1'b0;
reg [AXIS_PCIE_CC_USER_W-1:0] temp_m_axis_cc_tuser_reg = '0;
// datapath control
reg store_axis_int_to_output;
reg store_axis_int_to_temp;
reg store_axis_temp_to_output;
assign m_axis_cc.tdata = m_axis_cc_tdata_reg;
assign m_axis_cc.tkeep = m_axis_cc_tkeep_reg;
assign m_axis_cc.tstrb = m_axis_cc.tkeep;
assign m_axis_cc.tvalid = m_axis_cc_tvalid_reg;
assign m_axis_cc.tlast = m_axis_cc_tlast_reg;
assign m_axis_cc.tuser = m_axis_cc_tuser_reg;
assign m_axis_cc.tid = '0;
assign m_axis_cc.tdest = '0;
// enable ready input next cycle if output is ready or if both output registers are empty
assign m_axis_cc_tready_int_early = m_axis_cc.tready || (!temp_m_axis_cc_tvalid_reg && !m_axis_cc_tvalid_reg);
always_comb begin
// transfer sink ready state to source
m_axis_cc_tvalid_next = m_axis_cc_tvalid_reg;
temp_m_axis_cc_tvalid_next = temp_m_axis_cc_tvalid_reg;
store_axis_int_to_output = 1'b0;
store_axis_int_to_temp = 1'b0;
store_axis_temp_to_output = 1'b0;
if (m_axis_cc_tready_int_reg) begin
// input is ready
if (m_axis_cc.tready || !m_axis_cc_tvalid_reg) begin
// output is ready or currently not valid, transfer data to output
m_axis_cc_tvalid_next = m_axis_cc_tvalid_int;
store_axis_int_to_output = 1'b1;
end else begin
// output is not ready, store input in temp
temp_m_axis_cc_tvalid_next = m_axis_cc_tvalid_int;
store_axis_int_to_temp = 1'b1;
end
end else if (m_axis_cc.tready) begin
// input is not ready, but output is ready
m_axis_cc_tvalid_next = temp_m_axis_cc_tvalid_reg;
temp_m_axis_cc_tvalid_next = 1'b0;
store_axis_temp_to_output = 1'b1;
end
end
always_ff @(posedge clk) begin
m_axis_cc_tvalid_reg <= m_axis_cc_tvalid_next;
m_axis_cc_tready_int_reg <= m_axis_cc_tready_int_early;
temp_m_axis_cc_tvalid_reg <= temp_m_axis_cc_tvalid_next;
// datapath
if (store_axis_int_to_output) begin
m_axis_cc_tdata_reg <= m_axis_cc_tdata_int;
m_axis_cc_tkeep_reg <= m_axis_cc_tkeep_int;
m_axis_cc_tlast_reg <= m_axis_cc_tlast_int;
m_axis_cc_tuser_reg <= m_axis_cc_tuser_int;
end else if (store_axis_temp_to_output) begin
m_axis_cc_tdata_reg <= temp_m_axis_cc_tdata_reg;
m_axis_cc_tkeep_reg <= temp_m_axis_cc_tkeep_reg;
m_axis_cc_tlast_reg <= temp_m_axis_cc_tlast_reg;
m_axis_cc_tuser_reg <= temp_m_axis_cc_tuser_reg;
end
if (store_axis_int_to_temp) begin
temp_m_axis_cc_tdata_reg <= m_axis_cc_tdata_int;
temp_m_axis_cc_tkeep_reg <= m_axis_cc_tkeep_int;
temp_m_axis_cc_tlast_reg <= m_axis_cc_tlast_int;
temp_m_axis_cc_tuser_reg <= m_axis_cc_tuser_int;
end
if (rst) begin
m_axis_cc_tvalid_reg <= 1'b0;
m_axis_cc_tready_int_reg <= 1'b0;
temp_m_axis_cc_tvalid_reg <= 1'b0;
end
end
endmodule
`resetall

View File

@@ -0,0 +1,56 @@
# SPDX-License-Identifier: CERN-OHL-S-2.0
#
# Copyright (c) 2021-2025 FPGA Ninja, LLC
#
# Authors:
# - Alex Forencich
TOPLEVEL_LANG = verilog
SIM ?= verilator
WAVES ?= 0
COCOTB_HDL_TIMEUNIT = 1ns
COCOTB_HDL_TIMEPRECISION = 1ps
RTL_DIR = ../../rtl
LIB_DIR = ../../lib
TAXI_SRC_DIR = $(LIB_DIR)/taxi/src
DUT = taxi_pcie_us_axil_master
COCOTB_TEST_MODULES = test_$(DUT)
COCOTB_TOPLEVEL = test_$(DUT)
MODULE = $(COCOTB_TEST_MODULES)
TOPLEVEL = $(COCOTB_TOPLEVEL)
VERILOG_SOURCES += $(COCOTB_TOPLEVEL).sv
VERILOG_SOURCES += $(RTL_DIR)/$(DUT).sv
VERILOG_SOURCES += $(TAXI_SRC_DIR)/axis/rtl/taxi_axis_if.sv
VERILOG_SOURCES += $(TAXI_SRC_DIR)/axi/rtl/taxi_axil_if.sv
# handle file list files
process_f_file = $(call process_f_files,$(addprefix $(dir $1),$(shell cat $1)))
process_f_files = $(foreach f,$1,$(if $(filter %.f,$f),$(call process_f_file,$f),$f))
uniq_base = $(if $1,$(call uniq_base,$(foreach f,$1,$(if $(filter-out $(notdir $(lastword $1)),$(notdir $f)),$f,))) $(lastword $1))
VERILOG_SOURCES := $(call uniq_base,$(call process_f_files,$(VERILOG_SOURCES)))
# module parameters
export PARAM_AXIS_PCIE_DATA_W := 64
export PARAM_AXIS_PCIE_CQ_USER_W := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_W)),88,183)
export PARAM_AXIS_PCIE_CC_USER_W := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_W)),33,81)
export PARAM_AXIL_DATA_W := 32
export PARAM_AXIL_ADDR_W := 64
ifeq ($(SIM), icarus)
PLUSARGS += -fst
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(COCOTB_TOPLEVEL).$(subst PARAM_,,$(v))=$($(v)))
else ifeq ($(SIM), verilator)
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v)))
ifeq ($(WAVES), 1)
COMPILE_ARGS += --trace-fst
VERILATOR_TRACE = 1
endif
endif
include $(shell cocotb-config --makefiles)/Makefile.sim

View File

@@ -0,0 +1,466 @@
#!/usr/bin/env python
# SPDX-License-Identifier: CERN-OHL-S-2.0
"""
Copyright (c) 2020-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
"""
import itertools
import logging
import os
import re
from contextlib import contextmanager
import cocotb_test.simulator
import pytest
import cocotb
from cocotb.triggers import RisingEdge, FallingEdge, Timer
from cocotb.regression import TestFactory
from cocotbext.axi import AxiStreamBus
from cocotbext.pcie.core import RootComplex
from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice
from cocotbext.axi import AxiLiteBus, AxiLiteRam
@contextmanager
def assert_raises(exc_type, pattern=None):
try:
yield
except exc_type as e:
if pattern:
assert re.match(pattern, str(e)), \
"Correct exception type caught, but message did not match pattern"
pass
else:
raise AssertionError("{} was not raised".format(exc_type.__name__))
class TB(object):
def __init__(self, dut):
self.dut = dut
self.log = logging.getLogger("cocotb.tb")
self.log.setLevel(logging.DEBUG)
# PCIe
self.rc = RootComplex()
self.dev = UltraScalePlusPcieDevice(
# configuration options
pcie_generation=3,
# pcie_link_width=2,
# user_clk_frequency=250e6,
alignment="dword",
cq_straddle=False,
cc_straddle=False,
rq_straddle=False,
rc_straddle=False,
rc_4tlp_straddle=False,
pf_count=1,
max_payload_size=1024,
enable_client_tag=True,
enable_extended_tag=True,
enable_parity=False,
enable_rx_msg_interface=False,
enable_sriov=False,
enable_extended_configuration=False,
pf0_msi_enable=True,
pf0_msi_count=32,
pf1_msi_enable=False,
pf1_msi_count=1,
pf2_msi_enable=False,
pf2_msi_count=1,
pf3_msi_enable=False,
pf3_msi_count=1,
pf0_msix_enable=False,
pf0_msix_table_size=0,
pf0_msix_table_bir=0,
pf0_msix_table_offset=0x00000000,
pf0_msix_pba_bir=0,
pf0_msix_pba_offset=0x00000000,
pf1_msix_enable=False,
pf1_msix_table_size=0,
pf1_msix_table_bir=0,
pf1_msix_table_offset=0x00000000,
pf1_msix_pba_bir=0,
pf1_msix_pba_offset=0x00000000,
pf2_msix_enable=False,
pf2_msix_table_size=0,
pf2_msix_table_bir=0,
pf2_msix_table_offset=0x00000000,
pf2_msix_pba_bir=0,
pf2_msix_pba_offset=0x00000000,
pf3_msix_enable=False,
pf3_msix_table_size=0,
pf3_msix_table_bir=0,
pf3_msix_table_offset=0x00000000,
pf3_msix_pba_bir=0,
pf3_msix_pba_offset=0x00000000,
# signals
user_clk=dut.clk,
user_reset=dut.rst,
cq_bus=AxiStreamBus.from_entity(dut.s_axis_cq),
cc_bus=AxiStreamBus.from_entity(dut.m_axis_cc)
)
self.dev.log.setLevel(logging.DEBUG)
self.dev.functions[0].configure_bar(0, 16*1024*1024)
self.dev.functions[0].configure_bar(1, 16*1024, io=True)
self.rc.make_port().connect(self.dev)
# AXI
self.axil_ram = AxiLiteRam(AxiLiteBus.from_entity(dut.m_axil), dut.clk, dut.rst, size=2**16)
dut.completer_id.setimmediatevalue(0)
dut.completer_id_enable.setimmediatevalue(0)
# monitor error outputs
self.stat_err_cor_asserted = False
self.stat_err_uncor_asserted = False
cocotb.start_soon(self._run_monitor_stat_err_cor())
cocotb.start_soon(self._run_monitor_stat_err_uncor())
def set_idle_generator(self, generator=None):
if generator:
self.dev.cq_source.set_pause_generator(generator())
self.axil_ram.write_if.b_channel.set_pause_generator(generator())
self.axil_ram.read_if.r_channel.set_pause_generator(generator())
def set_backpressure_generator(self, generator=None):
if generator:
self.dev.cc_sink.set_pause_generator(generator())
self.axil_ram.write_if.aw_channel.set_pause_generator(generator())
self.axil_ram.write_if.w_channel.set_pause_generator(generator())
self.axil_ram.read_if.ar_channel.set_pause_generator(generator())
async def _run_monitor_stat_err_cor(self):
while True:
await RisingEdge(self.dut.stat_err_cor)
self.log.info("stat_err_cor (correctable error) was asserted")
self.stat_err_cor_asserted = True
async def _run_monitor_stat_err_uncor(self):
while True:
await RisingEdge(self.dut.stat_err_uncor)
self.log.info("stat_err_uncor (uncorrectable error) was asserted")
self.stat_err_uncor_asserted = True
async def run_test_write(dut, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
await FallingEdge(dut.rst)
await Timer(100, 'ns')
await tb.rc.enumerate()
dev = tb.rc.find_device(tb.dev.functions[0].pcie_id)
await dev.enable_device()
dev_bar0 = dev.bar_window[0]
dev_bar1 = dev.bar_window[1]
for length in range(0, 5):
for pcie_offset in range(4-length+1):
tb.log.info("length %d, pcie_offset %d", length, pcie_offset)
pcie_addr = pcie_offset+0x1000
test_data = bytearray([x % 256 for x in range(length)])
tb.axil_ram.write(pcie_addr-128, b'\x55'*(len(test_data)+256))
await dev_bar0.write(pcie_addr, test_data)
await Timer(100, 'ns')
tb.log.debug("%s", tb.axil_ram.hexdump_str((pcie_addr & ~0xf)-16, (((pcie_addr & 0xf)+length-1) & ~0xf)+48))
assert tb.axil_ram.read(pcie_addr-1, len(test_data)+2) == b'\x55'+test_data+b'\x55'
assert not tb.stat_err_cor_asserted
assert not tb.stat_err_uncor_asserted
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
async def run_test_read(dut, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
await FallingEdge(dut.rst)
await Timer(100, 'ns')
await tb.rc.enumerate()
dev = tb.rc.find_device(tb.dev.functions[0].pcie_id)
await dev.enable_device()
dev_bar0 = dev.bar_window[0]
dev_bar1 = dev.bar_window[1]
for length in range(0, 5):
for pcie_offset in range(4-length+1):
tb.log.info("length %d, pcie_offset %d", length, pcie_offset)
pcie_addr = pcie_offset+0x1000
test_data = bytearray([x % 256 for x in range(length)])
tb.axil_ram.write(pcie_addr-128, b'\x55'*(len(test_data)+256))
tb.axil_ram.write(pcie_addr, test_data)
tb.log.debug("%s", tb.axil_ram.hexdump_str((pcie_addr & ~0xf)-16, (((pcie_addr & 0xf)+length-1) & ~0xf)+48))
val = await dev_bar0.read(pcie_addr, len(test_data), timeout=1000, timeout_unit='ns')
tb.log.debug("read data: %s", val)
assert val == test_data
assert not tb.stat_err_cor_asserted
assert not tb.stat_err_uncor_asserted
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
async def run_test_io_write(dut, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
await FallingEdge(dut.rst)
await Timer(100, 'ns')
await tb.rc.enumerate()
dev = tb.rc.find_device(tb.dev.functions[0].pcie_id)
await dev.enable_device()
dev_bar0 = dev.bar_window[0]
dev_bar1 = dev.bar_window[1]
for length in range(1, 5):
for pcie_offset in range(4-length+1):
tb.log.info("length %d, pcie_offset %d", length, pcie_offset)
pcie_addr = pcie_offset+0x1000
test_data = bytearray([x % 256 for x in range(length)])
tb.axil_ram.write(pcie_addr-128, b'\x55'*(len(test_data)+256))
await dev_bar1.write(pcie_addr, test_data, timeout=1000, timeout_unit='ns')
tb.log.debug("%s", tb.axil_ram.hexdump_str((pcie_addr & ~0xf)-16, (((pcie_addr & 0xf)+length-1) & ~0xf)+48, prefix="AXI "))
assert tb.axil_ram.read(pcie_addr-1, len(test_data)+2) == b'\x55'+test_data+b'\x55'
assert not tb.stat_err_cor_asserted
assert not tb.stat_err_uncor_asserted
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
async def run_test_io_read(dut, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
await FallingEdge(dut.rst)
await Timer(100, 'ns')
await tb.rc.enumerate()
dev = tb.rc.find_device(tb.dev.functions[0].pcie_id)
await dev.enable_device()
dev_bar0 = dev.bar_window[0]
dev_bar1 = dev.bar_window[1]
for length in range(1, 5):
for pcie_offset in range(4-length+1):
tb.log.info("length %d, pcie_offset %d", length, pcie_offset)
pcie_addr = pcie_offset+0x1000
test_data = bytearray([x % 256 for x in range(length)])
tb.axil_ram.write(pcie_addr-128, b'\x55'*(len(test_data)+256))
tb.axil_ram.write(pcie_addr, test_data)
tb.log.debug("%s", tb.axil_ram.hexdump_str((pcie_addr & ~0xf)-16, (((pcie_addr & 0xf)+length-1) & ~0xf)+48, prefix="AXI "))
val = await dev_bar1.read(pcie_addr, len(test_data), timeout=1000, timeout_unit='ns')
tb.log.debug("read data: %s", val)
assert val == test_data
assert not tb.stat_err_cor_asserted
assert not tb.stat_err_uncor_asserted
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
async def run_test_bad_ops(dut, idle_inserter=None, backpressure_inserter=None):
tb = TB(dut)
tb.set_idle_generator(idle_inserter)
tb.set_backpressure_generator(backpressure_inserter)
await FallingEdge(dut.rst)
await Timer(100, 'ns')
await tb.rc.enumerate()
dev = tb.rc.find_device(tb.dev.functions[0].pcie_id)
await dev.enable_device()
dev_bar0 = dev.bar_window[0]
dev_bar1 = dev.bar_window[1]
tb.log.info("Test bad write")
length = 32
pcie_addr = 0x1000
test_data = bytearray([x % 256 for x in range(length)])
tb.axil_ram.write(pcie_addr-128, b'\x55'*(len(test_data)+256))
await dev_bar0.write(pcie_addr, test_data)
await Timer(100, 'ns')
tb.log.debug("%s", tb.axil_ram.hexdump_str((pcie_addr & ~0xf)-16, (((pcie_addr & 0xf)+length-1) & ~0xf)+48, prefix="AXI "))
assert tb.axil_ram.read(pcie_addr-1, len(test_data)+2) == b'\x55'*(len(test_data)+2)
assert not tb.stat_err_cor_asserted
assert tb.stat_err_uncor_asserted
tb.stat_err_cor_asserted = False
tb.stat_err_uncor_asserted = False
tb.log.info("Test bad read")
length = 32
pcie_addr = 0x1000
test_data = bytearray([x % 256 for x in range(length)])
tb.axil_ram.write(pcie_addr-128, b'\x55'*(len(test_data)+256))
tb.axil_ram.write(pcie_addr, test_data)
tb.log.debug("%s", tb.axil_ram.hexdump_str((pcie_addr & ~0xf)-16, (((pcie_addr & 0xf)+length-1) & ~0xf)+48, prefix="AXI "))
with assert_raises(Exception, "Unsuccessful completion"):
val = await dev_bar0.read(pcie_addr, len(test_data), timeout=1000, timeout_unit='ns')
assert tb.stat_err_cor_asserted
assert not tb.stat_err_uncor_asserted
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
def cycle_pause():
return itertools.cycle([1, 1, 1, 0])
if cocotb.SIM_NAME:
for test in [
run_test_write,
run_test_read,
run_test_io_write,
run_test_io_read,
run_test_bad_ops
]:
factory = TestFactory(test)
factory.add_option("idle_inserter", [None, cycle_pause])
factory.add_option("backpressure_inserter", [None, cycle_pause])
factory.generate_tests()
# cocotb-test
tests_dir = os.path.abspath(os.path.dirname(__file__))
rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl'))
lib_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'lib'))
taxi_src_dir = os.path.abspath(os.path.join(lib_dir, 'taxi', 'src'))
def process_f_files(files):
lst = {}
for f in files:
if f[-2:].lower() == '.f':
with open(f, 'r') as fp:
l = fp.read().split()
for f in process_f_files([os.path.join(os.path.dirname(f), x) for x in l]):
lst[os.path.basename(f)] = f
else:
lst[os.path.basename(f)] = f
return list(lst.values())
@pytest.mark.parametrize("axis_pcie_data_w", [64, 128, 256, 512])
def test_taxi_pcie_us_axil_master(request, axis_pcie_data_w):
dut = "taxi_pcie_us_axil_master"
module = os.path.splitext(os.path.basename(__file__))[0]
toplevel = module
verilog_sources = [
os.path.join(tests_dir, f"{toplevel}.sv"),
os.path.join(rtl_dir, f"{dut}.sv"),
os.path.join(taxi_src_dir, "axis", "rtl", "taxi_axis_if.sv"),
os.path.join(taxi_src_dir, "axi", "rtl", "taxi_axil_if.sv"),
]
verilog_sources = process_f_files(verilog_sources)
parameters = {}
parameters['AXIS_PCIE_DATA_W'] = axis_pcie_data_w
parameters['AXIS_PCIE_CQ_USER_W'] = 88 if parameters['AXIS_PCIE_DATA_W'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_W'] = 33 if parameters['AXIS_PCIE_DATA_W'] < 512 else 81
parameters['AXIL_DATA_W'] = 32
parameters['AXIL_ADDR_W'] = 64
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
extra_env['COCOTB_RESOLVE_X'] = 'RANDOM'
sim_build = os.path.join(tests_dir, "sim_build",
request.node.name.replace('[', '-').replace(']', ''))
cocotb_test.simulator.run(
simulator="verilator",
python_search=[tests_dir],
verilog_sources=verilog_sources,
toplevel=toplevel,
module=module,
parameters=parameters,
sim_build=sim_build,
extra_env=extra_env,
)

View File

@@ -0,0 +1,99 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* UltraScale PCIe AXI Lite Master testbench
*/
module test_taxi_pcie_us_axil_master #
(
/* verilator lint_off WIDTHTRUNC */
parameter AXIS_PCIE_DATA_W = 64,
parameter AXIS_PCIE_CQ_USER_W = AXIS_PCIE_DATA_W < 512 ? 85 : 183,
parameter AXIS_PCIE_CC_USER_W = AXIS_PCIE_DATA_W < 512 ? 33 : 81,
parameter AXIL_DATA_W = 32,
parameter AXIL_ADDR_W = 64
/* verilator lint_on WIDTHTRUNC */
)
();
localparam AXIS_PCIE_KEEP_W = (AXIS_PCIE_DATA_W/32);
logic clk;
logic rst;
taxi_axis_if #(
.DATA_W(AXIS_PCIE_DATA_W),
.KEEP_EN(1),
.KEEP_W(AXIS_PCIE_KEEP_W),
.USER_EN(1),
.USER_W(AXIS_PCIE_CQ_USER_W)
) s_axis_cq();
taxi_axis_if #(
.DATA_W(AXIS_PCIE_DATA_W),
.KEEP_EN(1),
.KEEP_W(AXIS_PCIE_KEEP_W),
.USER_EN(1),
.USER_W(AXIS_PCIE_CC_USER_W)
) m_axis_cc();
taxi_axil_if #(
.DATA_W(AXIL_DATA_W),
.ADDR_W(AXIL_ADDR_W),
.AWUSER_EN(1'b0),
.WUSER_EN(1'b0),
.BUSER_EN(1'b0),
.ARUSER_EN(1'b0),
.RUSER_EN(1'b0)
) m_axil();
logic [15:0] completer_id;
logic completer_id_enable;
logic stat_err_cor;
logic stat_err_uncor;
taxi_pcie_us_axil_master
uut (
.clk(clk),
.rst(rst),
/*
* UltraScale PCIe interface
*/
.s_axis_cq(s_axis_cq),
.m_axis_cc(m_axis_cc),
/*
* AXI Lite Master output
*/
.m_axil_wr(m_axil),
.m_axil_rd(m_axil),
/*
* Configuration
*/
.completer_id(completer_id),
.completer_id_enable(completer_id_enable),
/*
* Status
*/
.stat_err_cor(stat_err_cor),
.stat_err_uncor(stat_err_uncor)
);
endmodule
`resetall