From b5c9c02b03ae4a4ce49e734f1e1d9ff684f479f0 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Mon, 25 Aug 2025 22:39:28 -0700 Subject: [PATCH] pcie: Add UltraScale PCIe AXI Lite Master module and testbench Signed-off-by: Alex Forencich --- src/pcie/rtl/taxi_pcie_us_axil_master.sv | 821 ++++++++++++++++++ src/pcie/tb/taxi_pcie_us_axil_master/Makefile | 56 ++ .../test_taxi_pcie_us_axil_master.py | 466 ++++++++++ .../test_taxi_pcie_us_axil_master.sv | 99 +++ 4 files changed, 1442 insertions(+) create mode 100644 src/pcie/rtl/taxi_pcie_us_axil_master.sv create mode 100644 src/pcie/tb/taxi_pcie_us_axil_master/Makefile create mode 100644 src/pcie/tb/taxi_pcie_us_axil_master/test_taxi_pcie_us_axil_master.py create mode 100644 src/pcie/tb/taxi_pcie_us_axil_master/test_taxi_pcie_us_axil_master.sv diff --git a/src/pcie/rtl/taxi_pcie_us_axil_master.sv b/src/pcie/rtl/taxi_pcie_us_axil_master.sv new file mode 100644 index 0000000..10845c1 --- /dev/null +++ b/src/pcie/rtl/taxi_pcie_us_axil_master.sv @@ -0,0 +1,821 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2018-2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * UltraScale PCIe AXI Lite Master + */ +module taxi_pcie_us_axil_master +( + input wire logic clk, + input wire logic rst, + + /* + * UltraScale PCIe interface + */ + taxi_axis_if.snk s_axis_cq, + taxi_axis_if.src m_axis_cc, + + /* + * AXI Lite Master output + */ + taxi_axil_if.wr_mst m_axil_wr, + taxi_axil_if.rd_mst m_axil_rd, + + /* + * Configuration + */ + input wire logic [15:0] completer_id, + input wire logic completer_id_enable, + + /* + * Status + */ + output wire logic stat_err_cor, + output wire logic stat_err_uncor +); + +// extract parameters +localparam AXIS_PCIE_DATA_W = s_axis_cq.DATA_W; +localparam AXIS_PCIE_KEEP_W = s_axis_cq.KEEP_W; +localparam AXIS_PCIE_CQ_USER_W = s_axis_cq.USER_W; +localparam AXIS_PCIE_CC_USER_W = m_axis_cc.USER_W; +localparam AXI_DATA_W = m_axil_wr.DATA_W; +localparam AXI_ADDR_W = m_axil_wr.ADDR_W; +localparam AXI_STRB_W = m_axil_wr.STRB_W; + +// check configuration +if (AXIS_PCIE_DATA_W != 64 && AXIS_PCIE_DATA_W != 128 && AXIS_PCIE_DATA_W != 256 && AXIS_PCIE_DATA_W != 512) + $fatal(0, "Error: PCIe interface width must be 64, 128, 256, or 512 (instance %m)"); + +if (AXIS_PCIE_KEEP_W * 32 != AXIS_PCIE_DATA_W) + $fatal(0, "Error: PCIe interface requires dword (32-bit) granularity (instance %m)"); + +if (AXIS_PCIE_DATA_W == 512) begin + if (AXIS_PCIE_CQ_USER_W != 183) + $fatal(0, "Error: PCIe CQ tuser width must be 183 (instance %m)"); + + if (AXIS_PCIE_CC_USER_W != 81) + $fatal(0, "Error: PCIe CC tuser width must be 81 (instance %m)"); +end else begin + if (AXIS_PCIE_CQ_USER_W != 85 && AXIS_PCIE_CQ_USER_W != 88) + $fatal(0, "Error: PCIe CQ tuser width must be 85 or 88 (instance %m)"); + + if (AXIS_PCIE_CC_USER_W != 33) + $fatal(0, "Error: PCIe CC tuser width must be 33 (instance %m)"); +end + +if (AXI_DATA_W != 32) + $fatal(0, "Error: AXI interface width must be 32 (instance %m)"); + +if (AXI_STRB_W * 8 != AXI_DATA_W) + $fatal(0, "Error: AXI interface requires byte (8-bit) granularity (instance %m)"); + +localparam [3:0] + REQ_MEM_READ = 4'b0000, + REQ_MEM_WRITE = 4'b0001, + REQ_IO_READ = 4'b0010, + REQ_IO_WRITE = 4'b0011, + REQ_MEM_FETCH_ADD = 4'b0100, + REQ_MEM_SWAP = 4'b0101, + REQ_MEM_CAS = 4'b0110, + REQ_MEM_READ_LOCKED = 4'b0111, + REQ_CFG_READ_0 = 4'b1000, + REQ_CFG_READ_1 = 4'b1001, + REQ_CFG_WRITE_0 = 4'b1010, + REQ_CFG_WRITE_1 = 4'b1011, + REQ_MSG = 4'b1100, + REQ_MSG_VENDOR = 4'b1101, + REQ_MSG_ATS = 4'b1110; + +localparam [2:0] + CPL_STATUS_SC = 3'b000, // successful completion + CPL_STATUS_UR = 3'b001, // unsupported request + CPL_STATUS_CRS = 3'b010, // configuration request retry status + CPL_STATUS_CA = 3'b100; // completer abort + +localparam [2:0] + STATE_IDLE = 3'd0, + STATE_HEADER = 3'd1, + STATE_READ = 3'd2, + STATE_WRITE_1 = 3'd3, + STATE_WRITE_2 = 3'd4, + STATE_WAIT_END = 3'd5, + STATE_CPL_1 = 3'd6, + STATE_CPL_2 = 3'd7; + +wire [63:0] req_tlp_hdr_addr; +wire [10:0] req_tlp_hdr_length; +wire [3:0] req_tlp_hdr_type; +wire [15:0] req_tlp_hdr_requester_id; +wire [7:0] req_tlp_hdr_tag; +wire [2:0] req_tlp_hdr_tc; +wire [2:0] req_tlp_hdr_attr; +wire [3:0] req_tlp_hdr_first_be; +wire [3:0] req_tlp_hdr_last_be; +wire [31:0] req_tlp_data; + +if (AXIS_PCIE_DATA_W == 64) begin + assign req_tlp_hdr_addr = {s_axis_cq.tdata[63:2], 2'b00}; + assign req_tlp_hdr_length = s_axis_cq.tdata[10:0]; + assign req_tlp_hdr_type = s_axis_cq.tdata[14:11]; + assign req_tlp_hdr_requester_id = s_axis_cq.tdata[31:16]; + assign req_tlp_hdr_tag = s_axis_cq.tdata[39:32]; + assign req_tlp_hdr_tc = s_axis_cq.tdata[59:57]; + assign req_tlp_hdr_attr = s_axis_cq.tdata[62:60]; +end else begin + assign req_tlp_hdr_addr = {s_axis_cq.tdata[63:2], 2'b00}; + assign req_tlp_hdr_length = s_axis_cq.tdata[74:64]; + assign req_tlp_hdr_type = s_axis_cq.tdata[78:75]; + assign req_tlp_hdr_requester_id = s_axis_cq.tdata[95:80]; + assign req_tlp_hdr_tag = s_axis_cq.tdata[103:96]; + assign req_tlp_hdr_tc = s_axis_cq.tdata[123:121]; + assign req_tlp_hdr_attr = s_axis_cq.tdata[126:124]; +end + +if (AXIS_PCIE_DATA_W == 512) begin + assign req_tlp_hdr_first_be = s_axis_cq.tuser[3:0]; + assign req_tlp_hdr_last_be = s_axis_cq.tuser[11:8]; +end else begin + assign req_tlp_hdr_first_be = s_axis_cq.tuser[3:0]; + assign req_tlp_hdr_last_be = s_axis_cq.tuser[7:4]; +end + +if (AXIS_PCIE_DATA_W >= 256) begin + assign req_tlp_data = s_axis_cq.tdata[159:128]; +end else begin + assign req_tlp_data = s_axis_cq.tdata[31:0]; +end + +logic [95:0] cpl_tlp_hdr; +logic [32:0] cpl_tuser_1; +logic [80:0] cpl_tuser_2; + +logic [2:0] state_reg = STATE_IDLE, state_next; + +logic [10:0] dword_count_reg = '0, dword_count_next; +logic [3:0] type_reg = '0, type_next; +logic [2:0] status_reg = '0, status_next; +logic [15:0] requester_id_reg = '0, requester_id_next; +logic [7:0] tag_reg = '0, tag_next; +logic [2:0] tc_reg = '0, tc_next; +logic [2:0] attr_reg = '0, attr_next; +logic [3:0] first_be_reg = '0, first_be_next; +logic [3:0] last_be_reg = '0, last_be_next; +logic cpl_data_reg = 1'b0, cpl_data_next; + +logic s_axis_cq_tready_reg = 1'b0, s_axis_cq_tready_next; + +logic [AXI_ADDR_W-1:0] m_axil_addr_reg = '0, m_axil_addr_next; +logic m_axil_awvalid_reg = 1'b0, m_axil_awvalid_next; +logic [AXI_DATA_W-1:0] m_axil_wdata_reg = '0, m_axil_wdata_next; +logic [AXI_STRB_W-1:0] m_axil_wstrb_reg = '0, m_axil_wstrb_next; +logic m_axil_wvalid_reg = 1'b0, m_axil_wvalid_next; +logic m_axil_bready_reg = 1'b0, m_axil_bready_next; +logic m_axil_arvalid_reg = 1'b0, m_axil_arvalid_next; +logic m_axil_rready_reg = 1'b0, m_axil_rready_next; + +logic stat_err_cor_reg = 1'b0, stat_err_cor_next; +logic stat_err_uncor_reg = 1'b0, stat_err_uncor_next; + +// internal datapath +logic [AXIS_PCIE_DATA_W-1:0] m_axis_cc_tdata_int; +logic [AXIS_PCIE_KEEP_W-1:0] m_axis_cc_tkeep_int; +logic m_axis_cc_tvalid_int; +logic m_axis_cc_tready_int_reg = 1'b0; +logic m_axis_cc_tlast_int; +logic [AXIS_PCIE_CC_USER_W-1:0] m_axis_cc_tuser_int; +wire m_axis_cc_tready_int_early; + +assign s_axis_cq.tready = s_axis_cq_tready_reg; + +assign m_axil_wr.awaddr = m_axil_addr_reg; +assign m_axil_wr.awprot = 3'b010; +assign m_axil_wr.awvalid = m_axil_awvalid_reg; +assign m_axil_wr.wdata = m_axil_wdata_reg; +assign m_axil_wr.wstrb = m_axil_wstrb_reg; +assign m_axil_wr.wvalid = m_axil_wvalid_reg; +assign m_axil_wr.bready = m_axil_bready_reg; +assign m_axil_rd.araddr = m_axil_addr_reg; +assign m_axil_rd.arprot = 3'b010; +assign m_axil_rd.arvalid = m_axil_arvalid_reg; +assign m_axil_rd.rready = m_axil_rready_reg; + +assign stat_err_cor = stat_err_cor_reg; +assign stat_err_uncor = stat_err_uncor_reg; + +always_comb begin + state_next = STATE_IDLE; + + s_axis_cq_tready_next = 1'b0; + + dword_count_next = dword_count_reg; + type_next = type_reg; + status_next = status_reg; + requester_id_next = requester_id_reg; + tag_next = tag_reg; + tc_next = tc_reg; + attr_next = attr_reg; + first_be_next = first_be_reg; + last_be_next = last_be_reg; + cpl_data_next = cpl_data_reg; + + m_axis_cc_tdata_int = '0; + m_axis_cc_tkeep_int = '0; + m_axis_cc_tvalid_int = 1'b0; + m_axis_cc_tlast_int = 1'b0; + m_axis_cc_tuser_int = '0; + + casez (first_be_reg) + 4'b0000: cpl_tlp_hdr[6:0] = {m_axil_addr_reg[6:2], 2'b00}; // lower address + 4'bzzz1: cpl_tlp_hdr[6:0] = {m_axil_addr_reg[6:2], 2'b00}; // lower address + 4'bzz10: cpl_tlp_hdr[6:0] = {m_axil_addr_reg[6:2], 2'b01}; // lower address + 4'bz100: cpl_tlp_hdr[6:0] = {m_axil_addr_reg[6:2], 2'b10}; // lower address + 4'b1000: cpl_tlp_hdr[6:0] = {m_axil_addr_reg[6:2], 2'b11}; // lower address + endcase + cpl_tlp_hdr[9:8] = 2'b00; // AT + casez (first_be_reg) + 4'b0000: cpl_tlp_hdr[28:16] = 13'd1; // Byte count + 4'b0001: cpl_tlp_hdr[28:16] = 13'd1; // Byte count + 4'b0010: cpl_tlp_hdr[28:16] = 13'd1; // Byte count + 4'b0100: cpl_tlp_hdr[28:16] = 13'd1; // Byte count + 4'b1000: cpl_tlp_hdr[28:16] = 13'd1; // Byte count + 4'b0011: cpl_tlp_hdr[28:16] = 13'd2; // Byte count + 4'b0110: cpl_tlp_hdr[28:16] = 13'd2; // Byte count + 4'b1100: cpl_tlp_hdr[28:16] = 13'd2; // Byte count + 4'b01z1: cpl_tlp_hdr[28:16] = 13'd3; // Byte count + 4'b1z10: cpl_tlp_hdr[28:16] = 13'd3; // Byte count + 4'b1zz1: cpl_tlp_hdr[28:16] = 13'd4; // Byte count + endcase + cpl_tlp_hdr[42:32] = cpl_data_reg ? 11'd1 : 11'd0; // DWORD count + cpl_tlp_hdr[45:43] = status_reg; + cpl_tlp_hdr[63:48] = requester_id_reg; + cpl_tlp_hdr[71:64] = tag_reg; + cpl_tlp_hdr[87:72] = completer_id; + cpl_tlp_hdr[88] = completer_id_enable; + cpl_tlp_hdr[91:89] = tc_reg; + cpl_tlp_hdr[94:92] = attr_reg; + cpl_tlp_hdr[95] = 1'b0; // force ECRC + + // CC tuser sideband for 64 through 256-bit interface width + cpl_tuser_1[0] = 1'b0; // discontinue + cpl_tuser_1[32:1] = 32'd0; // parity + + // CC tuser sideband for 512-bit interface width + cpl_tuser_2[1:0] = 2'b01; // is_sop + cpl_tuser_2[3:2] = 2'd0; // is_sop0_ptr + cpl_tuser_2[5:4] = 2'd0; // is_sop1_ptr + cpl_tuser_2[7:6] = 2'b01; // is_eop + cpl_tuser_2[11:8] = cpl_data_reg ? 4'd3 : 4'd2; // is_eop0_ptr + cpl_tuser_2[15:12] = 4'd0; // is_eop1_ptr + cpl_tuser_2[16] = 1'b0; // discontinue + cpl_tuser_2[80:17] = 64'd0; // parity + + if (AXIS_PCIE_DATA_W == 64) begin + m_axis_cc_tdata_int = AXIS_PCIE_DATA_W'(cpl_tlp_hdr[63:0]); + m_axis_cc_tkeep_int = AXIS_PCIE_KEEP_W'(2'b11); + m_axis_cc_tlast_int = 1'b0; + end else begin + m_axis_cc_tdata_int = AXIS_PCIE_DATA_W'({m_axil_rd.rdata, cpl_tlp_hdr}); + m_axis_cc_tkeep_int = AXIS_PCIE_KEEP_W'({cpl_data_reg, 3'b111}); + m_axis_cc_tlast_int = 1'b1; + end + + if (AXIS_PCIE_DATA_W == 512) begin + m_axis_cc_tuser_int = AXIS_PCIE_CC_USER_W'(cpl_tuser_2); + end else begin + m_axis_cc_tuser_int = AXIS_PCIE_CC_USER_W'(cpl_tuser_1); + end + + m_axil_addr_next = m_axil_addr_reg; + m_axil_awvalid_next = m_axil_awvalid_reg && !m_axil_wr.awready; + m_axil_wdata_next = m_axil_wdata_reg; + m_axil_wstrb_next = m_axil_wstrb_reg; + m_axil_wvalid_next = m_axil_wvalid_reg && !m_axil_wr.wready; + m_axil_bready_next = 1'b0; + m_axil_arvalid_next = m_axil_arvalid_reg && !m_axil_rd.arready; + m_axil_rready_next = 1'b0; + + stat_err_cor_next = 1'b0; + stat_err_uncor_next = 1'b0; + + case (state_reg) + STATE_IDLE: begin + // idle state, wait for completion request + s_axis_cq_tready_next = m_axis_cc_tready_int_early; + + if (s_axis_cq.tready && s_axis_cq.tvalid) begin + // header fields + m_axil_addr_next = req_tlp_hdr_addr; + if (AXIS_PCIE_DATA_W > 64) begin + dword_count_next = req_tlp_hdr_length; + type_next = req_tlp_hdr_type; + requester_id_next = req_tlp_hdr_requester_id; + tag_next = req_tlp_hdr_tag; + tc_next = req_tlp_hdr_tc; + attr_next = req_tlp_hdr_attr; + + // data + if (AXIS_PCIE_DATA_W >= 256) begin + m_axil_wdata_next = req_tlp_data; + end + end + + first_be_next = req_tlp_hdr_first_be; + last_be_next = req_tlp_hdr_last_be; + + m_axil_wstrb_next = first_be_next; + + cpl_data_next = 1'b1; + status_next = CPL_STATUS_SC; // successful completion + + if (AXIS_PCIE_DATA_W == 64) begin + if (s_axis_cq.tlast) begin + // truncated packet + // report uncorrectable error + stat_err_uncor_next = 1'b1; + state_next = STATE_IDLE; + end else begin + state_next = STATE_HEADER; + end + end else begin + if (type_next == REQ_MEM_READ || type_next == REQ_IO_READ) begin + // read request + cpl_data_next = 1'b1; + if (s_axis_cq.tlast && dword_count_next == 11'd1) begin + m_axil_arvalid_next = 1'b1; + m_axil_rready_next = m_axis_cc_tready_int_early; + s_axis_cq_tready_next = 1'b0; + state_next = STATE_READ; + end else begin + // bad length + cpl_data_next = 1'b0; + status_next = CPL_STATUS_CA; // completer abort + // report correctable error + stat_err_cor_next = 1'b1; + if (s_axis_cq.tlast) begin + s_axis_cq_tready_next = 1'b0; + state_next = STATE_CPL_1; + end else begin + s_axis_cq_tready_next = 1'b1; + state_next = STATE_WAIT_END; + end + end + end else if (type_next == REQ_MEM_WRITE || type_next == REQ_IO_WRITE) begin + // write request + cpl_data_next = 1'b0; + if (AXIS_PCIE_DATA_W >= 256 && s_axis_cq.tlast && dword_count_next == 11'd1) begin + m_axil_awvalid_next = 1'b1; + m_axil_wvalid_next = 1'b1; + m_axil_bready_next = 1'b1; + s_axis_cq_tready_next = 1'b0; + state_next = STATE_WRITE_2; + end else if (AXIS_PCIE_DATA_W < 256 && dword_count_next == 11'd1) begin + s_axis_cq_tready_next = 1'b1; + state_next = STATE_WRITE_1; + end else begin + // bad length + status_next = CPL_STATUS_CA; // completer abort + if (type_next == REQ_MEM_WRITE) begin + // memory write - posted, no completion + // report uncorrectable error + stat_err_uncor_next = 1'b1; + if (s_axis_cq.tlast) begin + s_axis_cq_tready_next = m_axis_cc_tready_int_early; + state_next = STATE_IDLE; + end else begin + s_axis_cq_tready_next = 1'b1; + state_next = STATE_WAIT_END; + end + end else begin + // IO write - non-posted, send completion + // report correctable error + stat_err_cor_next = 1'b1; + if (s_axis_cq.tlast) begin + s_axis_cq_tready_next = 1'b0; + state_next = STATE_CPL_1; + end else begin + s_axis_cq_tready_next = 1'b1; + state_next = STATE_WAIT_END; + end + end + end + end else begin + // other request + cpl_data_next = 1'b0; + status_next = CPL_STATUS_UR; // unsupported request + if (type_next == REQ_MEM_WRITE || (type_next & 4'b1100) == 4'b1100) begin + // memory write or message - posted, no completion + // report uncorrectable error + stat_err_uncor_next = 1'b1; + if (s_axis_cq.tlast) begin + s_axis_cq_tready_next = m_axis_cc_tready_int_early; + state_next = STATE_IDLE; + end else begin + s_axis_cq_tready_next = 1'b1; + state_next = STATE_WAIT_END; + end + end else begin + // other non-posted request, send UR completion + // report correctable error + stat_err_cor_next = 1'b1; + if (s_axis_cq.tlast) begin + s_axis_cq_tready_next = 1'b0; + state_next = STATE_CPL_1; + end else begin + s_axis_cq_tready_next = 1'b1; + state_next = STATE_WAIT_END; + end + end + end + end + end else begin + state_next = STATE_IDLE; + end + end + STATE_HEADER: begin + // header state, handle header (64-bit interface only) + if (AXIS_PCIE_DATA_W == 64) begin + s_axis_cq_tready_next = m_axis_cc_tready_int_early; + + // header fields + dword_count_next = req_tlp_hdr_length; + type_next = req_tlp_hdr_type; + requester_id_next = req_tlp_hdr_requester_id; + tag_next = req_tlp_hdr_tag; + tc_next = req_tlp_hdr_tc; + attr_next = req_tlp_hdr_attr; + + // data + m_axil_wstrb_next = first_be_reg; + + if (s_axis_cq.tready && s_axis_cq.tvalid) begin + if (type_next == REQ_MEM_READ || type_next == REQ_IO_READ) begin + // read request + cpl_data_next = 1'b1; + if (s_axis_cq.tlast && dword_count_next == 11'd1) begin + m_axil_arvalid_next = 1'b1; + m_axil_rready_next = m_axis_cc_tready_int_early; + s_axis_cq_tready_next = 1'b0; + state_next = STATE_READ; + end else begin + // bad length + cpl_data_next = 1'b0; + status_next = CPL_STATUS_CA; // completer abort + // report correctable error + stat_err_cor_next = 1'b1; + if (s_axis_cq.tlast) begin + s_axis_cq_tready_next = 1'b0; + state_next = STATE_CPL_1; + end else begin + s_axis_cq_tready_next = 1'b1; + state_next = STATE_WAIT_END; + end + end + end else if (type_next == REQ_MEM_WRITE || type_next == REQ_IO_WRITE) begin + // write request + cpl_data_next = 1'b0; + if (dword_count_next == 11'd1) begin + s_axis_cq_tready_next = 1'b1; + state_next = STATE_WRITE_1; + end else begin + // bad length + status_next = CPL_STATUS_CA; // completer abort + if (type_next == REQ_MEM_WRITE) begin + // memory write - posted, no completion + // report uncorrectable error + stat_err_uncor_next = 1'b1; + if (s_axis_cq.tlast) begin + s_axis_cq_tready_next = m_axis_cc_tready_int_early; + state_next = STATE_IDLE; + end else begin + s_axis_cq_tready_next = 1'b1; + state_next = STATE_WAIT_END; + end + end else begin + // other non-posted request, send UR completion + // report correctable error + stat_err_cor_next = 1'b1; + if (s_axis_cq.tlast) begin + s_axis_cq_tready_next = 1'b0; + state_next = STATE_CPL_1; + end else begin + s_axis_cq_tready_next = 1'b1; + state_next = STATE_WAIT_END; + end + end + end + end else begin + // other request + cpl_data_next = 1'b0; + status_next = CPL_STATUS_UR; // unsupported request + if (type_next == REQ_MEM_WRITE || (type_next & 4'b1100) == 4'b1100) begin + // memory write or message - posted, no completion + // report uncorrectable error + stat_err_uncor_next = 1'b1; + if (s_axis_cq.tlast) begin + s_axis_cq_tready_next = m_axis_cc_tready_int_early; + state_next = STATE_IDLE; + end else begin + s_axis_cq_tready_next = 1'b1; + state_next = STATE_WAIT_END; + end + end else begin + // other non-posted request, send UR completion + // report correctable error + stat_err_cor_next = 1'b1; + if (s_axis_cq.tlast) begin + s_axis_cq_tready_next = 1'b0; + state_next = STATE_CPL_1; + end else begin + s_axis_cq_tready_next = 1'b1; + state_next = STATE_WAIT_END; + end + end + end + end else begin + state_next = STATE_HEADER; + end + end + end + STATE_READ: begin + // read state, wait for read response + m_axil_rready_next = m_axis_cc_tready_int_early; + + if (m_axil_rd.rready && m_axil_rd.rvalid) begin + // send completion + m_axis_cc_tvalid_int = 1'b1; + + m_axil_rready_next = 1'b0; + if (AXIS_PCIE_DATA_W == 64) begin + cpl_data_next = 1'b1; + state_next = STATE_CPL_2; + end else begin + s_axis_cq_tready_next = m_axis_cc_tready_int_early; + state_next = STATE_IDLE; + end + end else begin + state_next = STATE_READ; + end + end + STATE_WRITE_1: begin + // write 1 state, store write data and initiate write + s_axis_cq_tready_next = 1'b1; + + // data + m_axil_wdata_next = req_tlp_data; + + if (s_axis_cq.tready && s_axis_cq.tvalid) begin + if (s_axis_cq.tlast) begin + m_axil_awvalid_next = 1'b1; + m_axil_wvalid_next = 1'b1; + m_axil_bready_next = m_axis_cc_tready_int_early; + s_axis_cq_tready_next = 1'b0; + state_next = STATE_WRITE_2; + end else begin + s_axis_cq_tready_next = 1'b1; + state_next = STATE_WAIT_END; + end + end else begin + state_next = STATE_WRITE_1; + end + end + STATE_WRITE_2: begin + // write 2 state, handle write response + m_axil_bready_next = m_axis_cc_tready_int_early; + + if (m_axil_wr.bready && m_axil_wr.bvalid) begin + m_axil_bready_next = 1'b0; + if (type_reg == REQ_MEM_WRITE) begin + // memory write - posted, no completion + s_axis_cq_tready_next = m_axis_cc_tready_int_early; + state_next = STATE_IDLE; + end else begin + // IO write - non-posted, send completion + m_axis_cc_tvalid_int = 1'b1; + + if (AXIS_PCIE_DATA_W == 64) begin + state_next = STATE_CPL_2; + end else begin + s_axis_cq_tready_next = m_axis_cc_tready_int_early; + state_next = STATE_IDLE; + end + end + end else begin + state_next = STATE_WRITE_2; + end + end + STATE_WAIT_END: begin + // wait end state, wait for end of completion request + s_axis_cq_tready_next = 1'b1; + + if (s_axis_cq.tready && s_axis_cq.tvalid) begin + if (s_axis_cq.tlast) begin + // completion + if (type_reg == REQ_MEM_WRITE || (type_reg & 4'b1100) == 4'b1100) begin + // memory write or message - posted, no completion + s_axis_cq_tready_next = m_axis_cc_tready_int_early; + state_next = STATE_IDLE; + end else begin + // IO write - non-posted, send completion + m_axis_cc_tvalid_int = 1'b1; + + if (m_axis_cc_tready_int_reg) begin + if (AXIS_PCIE_DATA_W == 64) begin + state_next = STATE_CPL_2; + end else begin + s_axis_cq_tready_next = m_axis_cc_tready_int_early; + state_next = STATE_IDLE; + end + end else begin + state_next = STATE_CPL_1; + end + end + end else begin + state_next = STATE_WAIT_END; + end + end else begin + state_next = STATE_WAIT_END; + end + end + STATE_CPL_1: begin + // send completion + m_axis_cc_tvalid_int = 1'b1; + + if (m_axis_cc_tready_int_reg) begin + if (AXIS_PCIE_DATA_W == 64) begin + cpl_data_next = 1'b0; + state_next = STATE_CPL_2; + end else begin + s_axis_cq_tready_next = m_axis_cc_tready_int_early; + state_next = STATE_IDLE; + end + end else begin + state_next = STATE_CPL_1; + end + end + STATE_CPL_2: begin + // send rest of completion (64-bit interface only) + if (AXIS_PCIE_DATA_W == 64) begin + m_axis_cc_tvalid_int = 1'b1; + m_axis_cc_tdata_int = AXIS_PCIE_DATA_W'({m_axil_rd.rdata, cpl_tlp_hdr[95:64]}); + m_axis_cc_tkeep_int = AXIS_PCIE_KEEP_W'({cpl_data_reg, 1'b1}); + m_axis_cc_tlast_int = 1'b1; + + if (m_axis_cc_tready_int_reg) begin + s_axis_cq_tready_next = m_axis_cc_tready_int_early; + state_next = STATE_IDLE; + end else begin + state_next = STATE_CPL_2; + end + end + end + endcase +end + +always_ff @(posedge clk) begin + state_reg <= state_next; + + dword_count_reg <= dword_count_next; + type_reg <= type_next; + tag_reg <= tag_next; + status_reg <= status_next; + requester_id_reg <= requester_id_next; + tc_reg <= tc_next; + attr_reg <= attr_next; + first_be_reg <= first_be_next; + last_be_reg <= last_be_next; + cpl_data_reg <= cpl_data_next; + + s_axis_cq_tready_reg <= s_axis_cq_tready_next; + + m_axil_addr_reg <= m_axil_addr_next; + m_axil_awvalid_reg <= m_axil_awvalid_next; + m_axil_wdata_reg <= m_axil_wdata_next; + m_axil_wstrb_reg <= m_axil_wstrb_next; + m_axil_wvalid_reg <= m_axil_wvalid_next; + m_axil_bready_reg <= m_axil_bready_next; + m_axil_arvalid_reg <= m_axil_arvalid_next; + m_axil_rready_reg <= m_axil_rready_next; + + stat_err_cor_reg <= stat_err_cor_next; + stat_err_uncor_reg <= stat_err_uncor_next; + + if (rst) begin + state_reg <= STATE_IDLE; + s_axis_cq_tready_reg <= 1'b0; + + m_axil_awvalid_reg <= 1'b0; + m_axil_wvalid_reg <= 1'b0; + m_axil_bready_reg <= 1'b0; + m_axil_arvalid_reg <= 1'b0; + m_axil_rready_reg <= 1'b0; + + stat_err_cor_reg <= 1'b0; + stat_err_uncor_reg <= 1'b0; + end +end + +// output datapath logic +reg [AXIS_PCIE_DATA_W-1:0] m_axis_cc_tdata_reg = '0; +reg [AXIS_PCIE_KEEP_W-1:0] m_axis_cc_tkeep_reg = '0; +reg m_axis_cc_tvalid_reg = 1'b0, m_axis_cc_tvalid_next; +reg m_axis_cc_tlast_reg = 1'b0; +reg [AXIS_PCIE_CC_USER_W-1:0] m_axis_cc_tuser_reg = '0; + +reg [AXIS_PCIE_DATA_W-1:0] temp_m_axis_cc_tdata_reg = '0; +reg [AXIS_PCIE_KEEP_W-1:0] temp_m_axis_cc_tkeep_reg = '0; +reg temp_m_axis_cc_tvalid_reg = 1'b0, temp_m_axis_cc_tvalid_next; +reg temp_m_axis_cc_tlast_reg = 1'b0; +reg [AXIS_PCIE_CC_USER_W-1:0] temp_m_axis_cc_tuser_reg = '0; + +// datapath control +reg store_axis_int_to_output; +reg store_axis_int_to_temp; +reg store_axis_temp_to_output; + +assign m_axis_cc.tdata = m_axis_cc_tdata_reg; +assign m_axis_cc.tkeep = m_axis_cc_tkeep_reg; +assign m_axis_cc.tstrb = m_axis_cc.tkeep; +assign m_axis_cc.tvalid = m_axis_cc_tvalid_reg; +assign m_axis_cc.tlast = m_axis_cc_tlast_reg; +assign m_axis_cc.tuser = m_axis_cc_tuser_reg; +assign m_axis_cc.tid = '0; +assign m_axis_cc.tdest = '0; + +// enable ready input next cycle if output is ready or if both output registers are empty +assign m_axis_cc_tready_int_early = m_axis_cc.tready || (!temp_m_axis_cc_tvalid_reg && !m_axis_cc_tvalid_reg); + +always_comb begin + // transfer sink ready state to source + m_axis_cc_tvalid_next = m_axis_cc_tvalid_reg; + temp_m_axis_cc_tvalid_next = temp_m_axis_cc_tvalid_reg; + + store_axis_int_to_output = 1'b0; + store_axis_int_to_temp = 1'b0; + store_axis_temp_to_output = 1'b0; + + if (m_axis_cc_tready_int_reg) begin + // input is ready + if (m_axis_cc.tready || !m_axis_cc_tvalid_reg) begin + // output is ready or currently not valid, transfer data to output + m_axis_cc_tvalid_next = m_axis_cc_tvalid_int; + store_axis_int_to_output = 1'b1; + end else begin + // output is not ready, store input in temp + temp_m_axis_cc_tvalid_next = m_axis_cc_tvalid_int; + store_axis_int_to_temp = 1'b1; + end + end else if (m_axis_cc.tready) begin + // input is not ready, but output is ready + m_axis_cc_tvalid_next = temp_m_axis_cc_tvalid_reg; + temp_m_axis_cc_tvalid_next = 1'b0; + store_axis_temp_to_output = 1'b1; + end +end + +always_ff @(posedge clk) begin + m_axis_cc_tvalid_reg <= m_axis_cc_tvalid_next; + m_axis_cc_tready_int_reg <= m_axis_cc_tready_int_early; + temp_m_axis_cc_tvalid_reg <= temp_m_axis_cc_tvalid_next; + + // datapath + if (store_axis_int_to_output) begin + m_axis_cc_tdata_reg <= m_axis_cc_tdata_int; + m_axis_cc_tkeep_reg <= m_axis_cc_tkeep_int; + m_axis_cc_tlast_reg <= m_axis_cc_tlast_int; + m_axis_cc_tuser_reg <= m_axis_cc_tuser_int; + end else if (store_axis_temp_to_output) begin + m_axis_cc_tdata_reg <= temp_m_axis_cc_tdata_reg; + m_axis_cc_tkeep_reg <= temp_m_axis_cc_tkeep_reg; + m_axis_cc_tlast_reg <= temp_m_axis_cc_tlast_reg; + m_axis_cc_tuser_reg <= temp_m_axis_cc_tuser_reg; + end + + if (store_axis_int_to_temp) begin + temp_m_axis_cc_tdata_reg <= m_axis_cc_tdata_int; + temp_m_axis_cc_tkeep_reg <= m_axis_cc_tkeep_int; + temp_m_axis_cc_tlast_reg <= m_axis_cc_tlast_int; + temp_m_axis_cc_tuser_reg <= m_axis_cc_tuser_int; + end + + if (rst) begin + m_axis_cc_tvalid_reg <= 1'b0; + m_axis_cc_tready_int_reg <= 1'b0; + temp_m_axis_cc_tvalid_reg <= 1'b0; + end +end + +endmodule + +`resetall diff --git a/src/pcie/tb/taxi_pcie_us_axil_master/Makefile b/src/pcie/tb/taxi_pcie_us_axil_master/Makefile new file mode 100644 index 0000000..37c43e3 --- /dev/null +++ b/src/pcie/tb/taxi_pcie_us_axil_master/Makefile @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: CERN-OHL-S-2.0 +# +# Copyright (c) 2021-2025 FPGA Ninja, LLC +# +# Authors: +# - Alex Forencich + +TOPLEVEL_LANG = verilog + +SIM ?= verilator +WAVES ?= 0 + +COCOTB_HDL_TIMEUNIT = 1ns +COCOTB_HDL_TIMEPRECISION = 1ps + +RTL_DIR = ../../rtl +LIB_DIR = ../../lib +TAXI_SRC_DIR = $(LIB_DIR)/taxi/src + +DUT = taxi_pcie_us_axil_master +COCOTB_TEST_MODULES = test_$(DUT) +COCOTB_TOPLEVEL = test_$(DUT) +MODULE = $(COCOTB_TEST_MODULES) +TOPLEVEL = $(COCOTB_TOPLEVEL) +VERILOG_SOURCES += $(COCOTB_TOPLEVEL).sv +VERILOG_SOURCES += $(RTL_DIR)/$(DUT).sv +VERILOG_SOURCES += $(TAXI_SRC_DIR)/axis/rtl/taxi_axis_if.sv +VERILOG_SOURCES += $(TAXI_SRC_DIR)/axi/rtl/taxi_axil_if.sv + +# handle file list files +process_f_file = $(call process_f_files,$(addprefix $(dir $1),$(shell cat $1))) +process_f_files = $(foreach f,$1,$(if $(filter %.f,$f),$(call process_f_file,$f),$f)) +uniq_base = $(if $1,$(call uniq_base,$(foreach f,$1,$(if $(filter-out $(notdir $(lastword $1)),$(notdir $f)),$f,))) $(lastword $1)) +VERILOG_SOURCES := $(call uniq_base,$(call process_f_files,$(VERILOG_SOURCES))) + +# module parameters +export PARAM_AXIS_PCIE_DATA_W := 64 +export PARAM_AXIS_PCIE_CQ_USER_W := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_W)),88,183) +export PARAM_AXIS_PCIE_CC_USER_W := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_W)),33,81) +export PARAM_AXIL_DATA_W := 32 +export PARAM_AXIL_ADDR_W := 64 + +ifeq ($(SIM), icarus) + PLUSARGS += -fst + + COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(COCOTB_TOPLEVEL).$(subst PARAM_,,$(v))=$($(v))) +else ifeq ($(SIM), verilator) + COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v))) + + ifeq ($(WAVES), 1) + COMPILE_ARGS += --trace-fst + VERILATOR_TRACE = 1 + endif +endif + +include $(shell cocotb-config --makefiles)/Makefile.sim diff --git a/src/pcie/tb/taxi_pcie_us_axil_master/test_taxi_pcie_us_axil_master.py b/src/pcie/tb/taxi_pcie_us_axil_master/test_taxi_pcie_us_axil_master.py new file mode 100644 index 0000000..bacd65d --- /dev/null +++ b/src/pcie/tb/taxi_pcie_us_axil_master/test_taxi_pcie_us_axil_master.py @@ -0,0 +1,466 @@ +#!/usr/bin/env python +# SPDX-License-Identifier: CERN-OHL-S-2.0 +""" + +Copyright (c) 2020-2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +""" + +import itertools +import logging +import os +import re +from contextlib import contextmanager + +import cocotb_test.simulator +import pytest + +import cocotb +from cocotb.triggers import RisingEdge, FallingEdge, Timer +from cocotb.regression import TestFactory + +from cocotbext.axi import AxiStreamBus +from cocotbext.pcie.core import RootComplex +from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice +from cocotbext.axi import AxiLiteBus, AxiLiteRam + + +@contextmanager +def assert_raises(exc_type, pattern=None): + try: + yield + except exc_type as e: + if pattern: + assert re.match(pattern, str(e)), \ + "Correct exception type caught, but message did not match pattern" + pass + else: + raise AssertionError("{} was not raised".format(exc_type.__name__)) + + +class TB(object): + def __init__(self, dut): + self.dut = dut + + self.log = logging.getLogger("cocotb.tb") + self.log.setLevel(logging.DEBUG) + + # PCIe + self.rc = RootComplex() + + self.dev = UltraScalePlusPcieDevice( + # configuration options + pcie_generation=3, + # pcie_link_width=2, + # user_clk_frequency=250e6, + alignment="dword", + cq_straddle=False, + cc_straddle=False, + rq_straddle=False, + rc_straddle=False, + rc_4tlp_straddle=False, + pf_count=1, + max_payload_size=1024, + enable_client_tag=True, + enable_extended_tag=True, + enable_parity=False, + enable_rx_msg_interface=False, + enable_sriov=False, + enable_extended_configuration=False, + + pf0_msi_enable=True, + pf0_msi_count=32, + pf1_msi_enable=False, + pf1_msi_count=1, + pf2_msi_enable=False, + pf2_msi_count=1, + pf3_msi_enable=False, + pf3_msi_count=1, + pf0_msix_enable=False, + pf0_msix_table_size=0, + pf0_msix_table_bir=0, + pf0_msix_table_offset=0x00000000, + pf0_msix_pba_bir=0, + pf0_msix_pba_offset=0x00000000, + pf1_msix_enable=False, + pf1_msix_table_size=0, + pf1_msix_table_bir=0, + pf1_msix_table_offset=0x00000000, + pf1_msix_pba_bir=0, + pf1_msix_pba_offset=0x00000000, + pf2_msix_enable=False, + pf2_msix_table_size=0, + pf2_msix_table_bir=0, + pf2_msix_table_offset=0x00000000, + pf2_msix_pba_bir=0, + pf2_msix_pba_offset=0x00000000, + pf3_msix_enable=False, + pf3_msix_table_size=0, + pf3_msix_table_bir=0, + pf3_msix_table_offset=0x00000000, + pf3_msix_pba_bir=0, + pf3_msix_pba_offset=0x00000000, + + # signals + user_clk=dut.clk, + user_reset=dut.rst, + + cq_bus=AxiStreamBus.from_entity(dut.s_axis_cq), + + cc_bus=AxiStreamBus.from_entity(dut.m_axis_cc) + ) + + self.dev.log.setLevel(logging.DEBUG) + + self.dev.functions[0].configure_bar(0, 16*1024*1024) + self.dev.functions[0].configure_bar(1, 16*1024, io=True) + + self.rc.make_port().connect(self.dev) + + # AXI + self.axil_ram = AxiLiteRam(AxiLiteBus.from_entity(dut.m_axil), dut.clk, dut.rst, size=2**16) + + dut.completer_id.setimmediatevalue(0) + dut.completer_id_enable.setimmediatevalue(0) + + # monitor error outputs + self.stat_err_cor_asserted = False + self.stat_err_uncor_asserted = False + cocotb.start_soon(self._run_monitor_stat_err_cor()) + cocotb.start_soon(self._run_monitor_stat_err_uncor()) + + def set_idle_generator(self, generator=None): + if generator: + self.dev.cq_source.set_pause_generator(generator()) + self.axil_ram.write_if.b_channel.set_pause_generator(generator()) + self.axil_ram.read_if.r_channel.set_pause_generator(generator()) + + def set_backpressure_generator(self, generator=None): + if generator: + self.dev.cc_sink.set_pause_generator(generator()) + self.axil_ram.write_if.aw_channel.set_pause_generator(generator()) + self.axil_ram.write_if.w_channel.set_pause_generator(generator()) + self.axil_ram.read_if.ar_channel.set_pause_generator(generator()) + + async def _run_monitor_stat_err_cor(self): + while True: + await RisingEdge(self.dut.stat_err_cor) + self.log.info("stat_err_cor (correctable error) was asserted") + self.stat_err_cor_asserted = True + + async def _run_monitor_stat_err_uncor(self): + while True: + await RisingEdge(self.dut.stat_err_uncor) + self.log.info("stat_err_uncor (uncorrectable error) was asserted") + self.stat_err_uncor_asserted = True + + +async def run_test_write(dut, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + await FallingEdge(dut.rst) + await Timer(100, 'ns') + + await tb.rc.enumerate() + + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + await dev.enable_device() + + dev_bar0 = dev.bar_window[0] + dev_bar1 = dev.bar_window[1] + + for length in range(0, 5): + for pcie_offset in range(4-length+1): + tb.log.info("length %d, pcie_offset %d", length, pcie_offset) + pcie_addr = pcie_offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + + tb.axil_ram.write(pcie_addr-128, b'\x55'*(len(test_data)+256)) + + await dev_bar0.write(pcie_addr, test_data) + + await Timer(100, 'ns') + + tb.log.debug("%s", tb.axil_ram.hexdump_str((pcie_addr & ~0xf)-16, (((pcie_addr & 0xf)+length-1) & ~0xf)+48)) + + assert tb.axil_ram.read(pcie_addr-1, len(test_data)+2) == b'\x55'+test_data+b'\x55' + + assert not tb.stat_err_cor_asserted + assert not tb.stat_err_uncor_asserted + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +async def run_test_read(dut, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + await FallingEdge(dut.rst) + await Timer(100, 'ns') + + await tb.rc.enumerate() + + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + await dev.enable_device() + + dev_bar0 = dev.bar_window[0] + dev_bar1 = dev.bar_window[1] + + for length in range(0, 5): + for pcie_offset in range(4-length+1): + tb.log.info("length %d, pcie_offset %d", length, pcie_offset) + pcie_addr = pcie_offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + + tb.axil_ram.write(pcie_addr-128, b'\x55'*(len(test_data)+256)) + tb.axil_ram.write(pcie_addr, test_data) + + tb.log.debug("%s", tb.axil_ram.hexdump_str((pcie_addr & ~0xf)-16, (((pcie_addr & 0xf)+length-1) & ~0xf)+48)) + + val = await dev_bar0.read(pcie_addr, len(test_data), timeout=1000, timeout_unit='ns') + + tb.log.debug("read data: %s", val) + + assert val == test_data + + assert not tb.stat_err_cor_asserted + assert not tb.stat_err_uncor_asserted + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +async def run_test_io_write(dut, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + await FallingEdge(dut.rst) + await Timer(100, 'ns') + + await tb.rc.enumerate() + + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + await dev.enable_device() + + dev_bar0 = dev.bar_window[0] + dev_bar1 = dev.bar_window[1] + + for length in range(1, 5): + for pcie_offset in range(4-length+1): + tb.log.info("length %d, pcie_offset %d", length, pcie_offset) + pcie_addr = pcie_offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + + tb.axil_ram.write(pcie_addr-128, b'\x55'*(len(test_data)+256)) + + await dev_bar1.write(pcie_addr, test_data, timeout=1000, timeout_unit='ns') + + tb.log.debug("%s", tb.axil_ram.hexdump_str((pcie_addr & ~0xf)-16, (((pcie_addr & 0xf)+length-1) & ~0xf)+48, prefix="AXI ")) + + assert tb.axil_ram.read(pcie_addr-1, len(test_data)+2) == b'\x55'+test_data+b'\x55' + + assert not tb.stat_err_cor_asserted + assert not tb.stat_err_uncor_asserted + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +async def run_test_io_read(dut, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + await FallingEdge(dut.rst) + await Timer(100, 'ns') + + await tb.rc.enumerate() + + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + await dev.enable_device() + + dev_bar0 = dev.bar_window[0] + dev_bar1 = dev.bar_window[1] + + for length in range(1, 5): + for pcie_offset in range(4-length+1): + tb.log.info("length %d, pcie_offset %d", length, pcie_offset) + pcie_addr = pcie_offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + + tb.axil_ram.write(pcie_addr-128, b'\x55'*(len(test_data)+256)) + tb.axil_ram.write(pcie_addr, test_data) + + tb.log.debug("%s", tb.axil_ram.hexdump_str((pcie_addr & ~0xf)-16, (((pcie_addr & 0xf)+length-1) & ~0xf)+48, prefix="AXI ")) + + val = await dev_bar1.read(pcie_addr, len(test_data), timeout=1000, timeout_unit='ns') + + tb.log.debug("read data: %s", val) + + assert val == test_data + + assert not tb.stat_err_cor_asserted + assert not tb.stat_err_uncor_asserted + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +async def run_test_bad_ops(dut, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + await FallingEdge(dut.rst) + await Timer(100, 'ns') + + await tb.rc.enumerate() + + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + await dev.enable_device() + + dev_bar0 = dev.bar_window[0] + dev_bar1 = dev.bar_window[1] + + tb.log.info("Test bad write") + + length = 32 + pcie_addr = 0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + + tb.axil_ram.write(pcie_addr-128, b'\x55'*(len(test_data)+256)) + + await dev_bar0.write(pcie_addr, test_data) + + await Timer(100, 'ns') + + tb.log.debug("%s", tb.axil_ram.hexdump_str((pcie_addr & ~0xf)-16, (((pcie_addr & 0xf)+length-1) & ~0xf)+48, prefix="AXI ")) + + assert tb.axil_ram.read(pcie_addr-1, len(test_data)+2) == b'\x55'*(len(test_data)+2) + + assert not tb.stat_err_cor_asserted + assert tb.stat_err_uncor_asserted + + tb.stat_err_cor_asserted = False + tb.stat_err_uncor_asserted = False + + tb.log.info("Test bad read") + + length = 32 + pcie_addr = 0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + + tb.axil_ram.write(pcie_addr-128, b'\x55'*(len(test_data)+256)) + tb.axil_ram.write(pcie_addr, test_data) + + tb.log.debug("%s", tb.axil_ram.hexdump_str((pcie_addr & ~0xf)-16, (((pcie_addr & 0xf)+length-1) & ~0xf)+48, prefix="AXI ")) + + with assert_raises(Exception, "Unsuccessful completion"): + val = await dev_bar0.read(pcie_addr, len(test_data), timeout=1000, timeout_unit='ns') + + assert tb.stat_err_cor_asserted + assert not tb.stat_err_uncor_asserted + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +def cycle_pause(): + return itertools.cycle([1, 1, 1, 0]) + + +if cocotb.SIM_NAME: + + for test in [ + run_test_write, + run_test_read, + run_test_io_write, + run_test_io_read, + run_test_bad_ops + ]: + + factory = TestFactory(test) + factory.add_option("idle_inserter", [None, cycle_pause]) + factory.add_option("backpressure_inserter", [None, cycle_pause]) + factory.generate_tests() + + +# cocotb-test + +tests_dir = os.path.abspath(os.path.dirname(__file__)) +rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl')) +lib_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'lib')) +taxi_src_dir = os.path.abspath(os.path.join(lib_dir, 'taxi', 'src')) + + +def process_f_files(files): + lst = {} + for f in files: + if f[-2:].lower() == '.f': + with open(f, 'r') as fp: + l = fp.read().split() + for f in process_f_files([os.path.join(os.path.dirname(f), x) for x in l]): + lst[os.path.basename(f)] = f + else: + lst[os.path.basename(f)] = f + return list(lst.values()) + + +@pytest.mark.parametrize("axis_pcie_data_w", [64, 128, 256, 512]) +def test_taxi_pcie_us_axil_master(request, axis_pcie_data_w): + dut = "taxi_pcie_us_axil_master" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = module + + verilog_sources = [ + os.path.join(tests_dir, f"{toplevel}.sv"), + os.path.join(rtl_dir, f"{dut}.sv"), + os.path.join(taxi_src_dir, "axis", "rtl", "taxi_axis_if.sv"), + os.path.join(taxi_src_dir, "axi", "rtl", "taxi_axil_if.sv"), + ] + + verilog_sources = process_f_files(verilog_sources) + + parameters = {} + + parameters['AXIS_PCIE_DATA_W'] = axis_pcie_data_w + parameters['AXIS_PCIE_CQ_USER_W'] = 88 if parameters['AXIS_PCIE_DATA_W'] < 512 else 183 + parameters['AXIS_PCIE_CC_USER_W'] = 33 if parameters['AXIS_PCIE_DATA_W'] < 512 else 81 + parameters['AXIL_DATA_W'] = 32 + parameters['AXIL_ADDR_W'] = 64 + + extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} + + extra_env['COCOTB_RESOLVE_X'] = 'RANDOM' + + sim_build = os.path.join(tests_dir, "sim_build", + request.node.name.replace('[', '-').replace(']', '')) + + cocotb_test.simulator.run( + simulator="verilator", + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + parameters=parameters, + sim_build=sim_build, + extra_env=extra_env, + ) diff --git a/src/pcie/tb/taxi_pcie_us_axil_master/test_taxi_pcie_us_axil_master.sv b/src/pcie/tb/taxi_pcie_us_axil_master/test_taxi_pcie_us_axil_master.sv new file mode 100644 index 0000000..3e8519c --- /dev/null +++ b/src/pcie/tb/taxi_pcie_us_axil_master/test_taxi_pcie_us_axil_master.sv @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * UltraScale PCIe AXI Lite Master testbench + */ +module test_taxi_pcie_us_axil_master # +( + /* verilator lint_off WIDTHTRUNC */ + parameter AXIS_PCIE_DATA_W = 64, + parameter AXIS_PCIE_CQ_USER_W = AXIS_PCIE_DATA_W < 512 ? 85 : 183, + parameter AXIS_PCIE_CC_USER_W = AXIS_PCIE_DATA_W < 512 ? 33 : 81, + parameter AXIL_DATA_W = 32, + parameter AXIL_ADDR_W = 64 + /* verilator lint_on WIDTHTRUNC */ +) +(); + +localparam AXIS_PCIE_KEEP_W = (AXIS_PCIE_DATA_W/32); + +logic clk; +logic rst; + +taxi_axis_if #( + .DATA_W(AXIS_PCIE_DATA_W), + .KEEP_EN(1), + .KEEP_W(AXIS_PCIE_KEEP_W), + .USER_EN(1), + .USER_W(AXIS_PCIE_CQ_USER_W) +) s_axis_cq(); + +taxi_axis_if #( + .DATA_W(AXIS_PCIE_DATA_W), + .KEEP_EN(1), + .KEEP_W(AXIS_PCIE_KEEP_W), + .USER_EN(1), + .USER_W(AXIS_PCIE_CC_USER_W) +) m_axis_cc(); + +taxi_axil_if #( + .DATA_W(AXIL_DATA_W), + .ADDR_W(AXIL_ADDR_W), + .AWUSER_EN(1'b0), + .WUSER_EN(1'b0), + .BUSER_EN(1'b0), + .ARUSER_EN(1'b0), + .RUSER_EN(1'b0) +) m_axil(); + +logic [15:0] completer_id; +logic completer_id_enable; + +logic stat_err_cor; +logic stat_err_uncor; + +taxi_pcie_us_axil_master +uut ( + .clk(clk), + .rst(rst), + + /* + * UltraScale PCIe interface + */ + .s_axis_cq(s_axis_cq), + .m_axis_cc(m_axis_cc), + + /* + * AXI Lite Master output + */ + .m_axil_wr(m_axil), + .m_axil_rd(m_axil), + + /* + * Configuration + */ + .completer_id(completer_id), + .completer_id_enable(completer_id_enable), + + /* + * Status + */ + .stat_err_cor(stat_err_cor), + .stat_err_uncor(stat_err_uncor) +); + +endmodule + +`resetall