diff --git a/README.md b/README.md index 6730b10..8c96a25 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ To facilitate the dual-license model, contributions to the project can only be a * AXI * SV interface for AXI * AXI to AXI lite adapter + * Crossbar * Interconnect * Register slice * Width converter diff --git a/src/axi/rtl/taxi_axi_crossbar.f b/src/axi/rtl/taxi_axi_crossbar.f new file mode 100644 index 0000000..92a643a --- /dev/null +++ b/src/axi/rtl/taxi_axi_crossbar.f @@ -0,0 +1,3 @@ +taxi_axi_crossbar.sv +taxi_axi_crossbar_wr.f +taxi_axi_crossbar_rd.f diff --git a/src/axi/rtl/taxi_axi_crossbar.sv b/src/axi/rtl/taxi_axi_crossbar.sv new file mode 100644 index 0000000..c542399 --- /dev/null +++ b/src/axi/rtl/taxi_axi_crossbar.sv @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2018-2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI4 crossbar + */ +module taxi_axi_crossbar # +( + // Number of AXI inputs (slave interfaces) + parameter S_COUNT = 4, + // Number of AXI outputs (master interfaces) + parameter M_COUNT = 4, + // Address width in bits for address decoding + parameter ADDR_W = 32, + // TODO fix parametrization once verilator issue 5890 is fixed + // Number of concurrent unique IDs for each slave interface + // S_COUNT concatenated fields of 32 bits + parameter S_THREADS = {S_COUNT{32'd2}}, + // Number of concurrent operations for each slave interface + // S_COUNT concatenated fields of 32 bits + parameter S_ACCEPT = {S_COUNT{32'd16}}, + // Number of regions per master interface + parameter M_REGIONS = 1, + // Master interface base addresses + // M_COUNT concatenated fields of M_REGIONS concatenated fields of ADDR_W bits + // set to zero for default addressing based on M_ADDR_W + parameter M_BASE_ADDR = '0, + // Master interface address widths + // M_COUNT concatenated fields of M_REGIONS concatenated fields of 32 bits + parameter M_ADDR_W = {M_COUNT{{M_REGIONS{32'd24}}}}, + // Read connections between interfaces + // M_COUNT concatenated fields of S_COUNT bits + parameter M_CONNECT_RD = {M_COUNT{{S_COUNT{1'b1}}}}, + // Write connections between interfaces + // M_COUNT concatenated fields of S_COUNT bits + parameter M_CONNECT_WR = {M_COUNT{{S_COUNT{1'b1}}}}, + // Number of concurrent operations for each master interface + // M_COUNT concatenated fields of 32 bits + parameter M_ISSUE = {M_COUNT{32'd4}}, + // Secure master (fail operations based on awprot/arprot) + // M_COUNT bits + parameter M_SECURE = {M_COUNT{1'b0}}, + // Slave interface AW channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_AW_REG_TYPE = {S_COUNT{2'd0}}, + // Slave interface W channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_W_REG_TYPE = {S_COUNT{2'd0}}, + // Slave interface B channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_B_REG_TYPE = {S_COUNT{2'd1}}, + // Slave interface AR channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_AR_REG_TYPE = {S_COUNT{2'd0}}, + // Slave interface R channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_R_REG_TYPE = {S_COUNT{2'd2}}, + // Master interface AW channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_AW_REG_TYPE = {M_COUNT{2'd1}}, + // Master interface W channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_W_REG_TYPE = {M_COUNT{2'd2}}, + // Master interface B channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_B_REG_TYPE = {M_COUNT{2'd0}}, + // Master interface AR channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_AR_REG_TYPE = {M_COUNT{2'd1}}, + // Master interface R channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_R_REG_TYPE = {M_COUNT{2'd0}} +) +( + input wire logic clk, + input wire logic rst, + + /* + * AXI4 slave interfaces + */ + taxi_axi_if.wr_slv s_axi_wr[S_COUNT], + taxi_axi_if.rd_slv s_axi_rd[S_COUNT], + + /* + * AXI4 master interfaces + */ + taxi_axi_if.wr_mst m_axi_wr[M_COUNT], + taxi_axi_if.rd_mst m_axi_rd[M_COUNT] +); + +taxi_axi_crossbar_wr #( + .S_COUNT(S_COUNT), + .M_COUNT(M_COUNT), + .ADDR_W(ADDR_W), + .S_THREADS(S_THREADS), + .S_ACCEPT(S_ACCEPT), + .M_REGIONS(M_REGIONS), + .M_BASE_ADDR(M_BASE_ADDR), + .M_ADDR_W(M_ADDR_W), + .M_CONNECT(M_CONNECT_WR), + .M_ISSUE(M_ISSUE), + .M_SECURE(M_SECURE), + .S_AW_REG_TYPE(S_AW_REG_TYPE), + .S_W_REG_TYPE (S_W_REG_TYPE), + .S_B_REG_TYPE (S_B_REG_TYPE) +) +wr_inst ( + .clk(clk), + .rst(rst), + + /* + * AXI slave interfaces + */ + .s_axi_wr(s_axi_wr), + + /* + * AXI master interfaces + */ + .m_axi_wr(m_axi_wr) +); + +taxi_axi_crossbar_rd #( + .S_COUNT(S_COUNT), + .M_COUNT(M_COUNT), + .ADDR_W(ADDR_W), + .S_THREADS(S_THREADS), + .S_ACCEPT(S_ACCEPT), + .M_REGIONS(M_REGIONS), + .M_BASE_ADDR(M_BASE_ADDR), + .M_ADDR_W(M_ADDR_W), + .M_CONNECT(M_CONNECT_RD), + .M_ISSUE(M_ISSUE), + .M_SECURE(M_SECURE), + .S_AR_REG_TYPE(S_AR_REG_TYPE), + .S_R_REG_TYPE (S_R_REG_TYPE) +) +rd_inst ( + .clk(clk), + .rst(rst), + + /* + * AXI slave interfaces + */ + .s_axi_rd(s_axi_rd), + + /* + * AXI master interfaces + */ + .m_axi_rd(m_axi_rd) +); + +endmodule + +`resetall diff --git a/src/axi/rtl/taxi_axi_crossbar_addr.sv b/src/axi/rtl/taxi_axi_crossbar_addr.sv new file mode 100644 index 0000000..f4041bf --- /dev/null +++ b/src/axi/rtl/taxi_axi_crossbar_addr.sv @@ -0,0 +1,401 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2018-2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI4 crossbar address decode and admission control + */ +module taxi_axi_crossbar_addr # +( + // Slave interface index + parameter S = 0, + // Number of AXI inputs (slave interfaces) + parameter S_COUNT = 4, + // Number of AXI outputs (master interfaces) + parameter M_COUNT = 4, + // Select signal width + parameter SEL_W = $clog2(M_COUNT), + // Address width in bits for address decoding + parameter ADDR_W = 32, + // ID field width + parameter ID_W = 8, + // TODO fix parametrization once verilator issue 5890 is fixed + // Number of concurrent unique IDs + parameter S_THREADS = 32'd2, + // Number of concurrent operations + parameter S_ACCEPT = 32'd16, + // Number of regions per master interface + parameter M_REGIONS = 1, + // Master interface base addresses + // M_COUNT concatenated fields of M_REGIONS concatenated fields of ADDR_W bits + // set to zero for default addressing based on M_ADDR_W + parameter M_BASE_ADDR = '0, + // Master interface address widths + // M_COUNT concatenated fields of M_REGIONS concatenated fields of 32 bits + parameter M_ADDR_W = {M_COUNT{{M_REGIONS{32'd24}}}}, + // Connections between interfaces + // M_COUNT concatenated fields of S_COUNT bits + parameter M_CONNECT = {M_COUNT{{S_COUNT{1'b1}}}}, + // Secure master (fail operations based on awprot/arprot) + // M_COUNT bits + parameter M_SECURE = {M_COUNT{1'b0}}, + // Enable write command output + parameter WC_OUTPUT = 0 +) +( + input wire logic clk, + input wire logic rst, + + /* + * Address input + */ + input wire logic [ID_W-1:0] s_axi_aid, + input wire logic [ADDR_W-1:0] s_axi_aaddr, + input wire logic [2:0] s_axi_aprot, + input wire logic [3:0] s_axi_aqos, + input wire logic s_axi_avalid, + output wire logic s_axi_aready, + + /* + * Address output + */ + output wire logic [3:0] m_axi_aregion, + output wire logic [SEL_W-1:0] m_select, + output wire logic m_axi_avalid, + input wire logic m_axi_aready, + + /* + * Write command output + */ + output wire logic [SEL_W-1:0] m_wc_select, + output wire logic m_wc_decerr, + output wire logic m_wc_valid, + input wire logic m_wc_ready, + + /* + * Reply command output + */ + output wire logic m_rc_decerr, + output wire logic m_rc_valid, + input wire logic m_rc_ready, + + /* + * Completion input + */ + input wire logic [ID_W-1:0] s_cpl_id, + input wire logic s_cpl_valid +); + +localparam CL_S_COUNT = $clog2(S_COUNT); +localparam CL_M_COUNT = $clog2(M_COUNT); +localparam CL_S_COUNT_INT = CL_S_COUNT > 0 ? CL_S_COUNT : 1; +localparam CL_M_COUNT_INT = CL_M_COUNT > 0 ? CL_M_COUNT : 1; + +localparam [M_COUNT*M_REGIONS-1:0][31:0] M_ADDR_W_INT = M_ADDR_W; +localparam [M_COUNT-1:0][S_COUNT-1:0] M_CONNECT_INT = M_CONNECT; +localparam [M_COUNT-1:0] M_SECURE_INT = M_SECURE; + +localparam S_INT_THREADS = S_THREADS > S_ACCEPT ? S_ACCEPT : S_THREADS; +localparam CL_S_INT_THREADS = $clog2(S_INT_THREADS); +localparam CL_S_ACCEPT = $clog2(S_ACCEPT); + +// default address computation +function [M_COUNT*M_REGIONS-1:0][ADDR_W-1:0] calcBaseAddrs(input [31:0] dummy); + logic [ADDR_W-1:0] base; + logic [ADDR_W-1:0] width; + logic [ADDR_W-1:0] size; + logic [ADDR_W-1:0] mask; + begin + calcBaseAddrs = '0; + base = 0; + for (integer i = 0; i < M_COUNT*M_REGIONS; i = i + 1) begin + width = M_ADDR_W_INT[i]; + mask = {ADDR_W{1'b1}} >> (ADDR_W - width); + size = mask + 1; + if (width > 0) begin + if ((base & mask) != 0) begin + base = base + size - (base & mask); // align + end + calcBaseAddrs[i] = base; + base = base + size; // increment + end + end + end +endfunction + +localparam [M_COUNT*M_REGIONS-1:0][ADDR_W-1:0] M_BASE_ADDR_INT = M_BASE_ADDR != 0 ? (M_COUNT*M_REGIONS*ADDR_W)'(M_BASE_ADDR) : calcBaseAddrs(0); + +// check configuration +if (M_REGIONS < 1 || M_REGIONS > 16) + $fatal(0, "Error: M_REGIONS must be between 1 and 16 (instance %m)"); + +if (S_ACCEPT < 1) + $fatal(0, "Error: need at least 1 accept (instance %m)"); + +if (S_THREADS < 1) + $fatal(0, "Error: need at least 1 thread (instance %m)"); + +initial begin + if (S_THREADS > S_ACCEPT) begin + $warning("Warning: requested thread count larger than accept count; limiting thread count to accept count (instance %m)"); + end + + for (integer i = 0; i < M_COUNT*M_REGIONS; i = i + 1) begin + if (M_ADDR_W_INT[i] != 0 && (M_ADDR_W_INT[i] < 12 || M_ADDR_W_INT[i] > ADDR_W)) begin + $error("Error: address width out of range (instance %m)"); + $finish; + end + end + + $display("Addressing configuration for axi_crossbar_addr instance %m"); + for (integer i = 0; i < M_COUNT*M_REGIONS; i = i + 1) begin + if (M_ADDR_W_INT[i] != 0) begin + $display("%2d (%2d): %x / %02d -- %x-%x", + i/M_REGIONS, i%M_REGIONS, + M_BASE_ADDR_INT[i], + M_ADDR_W_INT[i], + M_BASE_ADDR_INT[i] & ({ADDR_W{1'b1}} << M_ADDR_W_INT[i]), + M_BASE_ADDR_INT[i] | ({ADDR_W{1'b1}} >> (ADDR_W - M_ADDR_W_INT[i])) + ); + end + end + + for (integer i = 0; i < M_COUNT*M_REGIONS; i = i + 1) begin + if ((M_BASE_ADDR_INT[i] & (2**M_ADDR_W_INT[i]-1)) != 0) begin + $display("Region not aligned:"); + $display("%2d (%2d): %x / %2d -- %x-%x", + i/M_REGIONS, i%M_REGIONS, + M_BASE_ADDR_INT[i], + M_ADDR_W_INT[i], + M_BASE_ADDR_INT[i] & ({ADDR_W{1'b1}} << M_ADDR_W_INT[i]), + M_BASE_ADDR_INT[i] | ({ADDR_W{1'b1}} >> (ADDR_W - M_ADDR_W_INT[i])) + ); + $error("Error: address range not aligned (instance %m)"); + $finish; + end + end + + for (integer i = 0; i < M_COUNT*M_REGIONS; i = i + 1) begin + for (integer j = i+1; j < M_COUNT*M_REGIONS; j = j + 1) begin + if (M_ADDR_W_INT[i] != 0 && M_ADDR_W_INT[j] != 0) begin + if (((M_BASE_ADDR_INT[i] & ({ADDR_W{1'b1}} << M_ADDR_W_INT[i])) <= (M_BASE_ADDR_INT[j] | ({ADDR_W{1'b1}} >> (ADDR_W - M_ADDR_W_INT[j])))) + && ((M_BASE_ADDR_INT[j] & ({ADDR_W{1'b1}} << M_ADDR_W_INT[j])) <= (M_BASE_ADDR_INT[i] | ({ADDR_W{1'b1}} >> (ADDR_W - M_ADDR_W_INT[i]))))) begin + $display("Overlapping regions:"); + $display("%2d (%2d): %x / %2d -- %x-%x", + i/M_REGIONS, i%M_REGIONS, + M_BASE_ADDR_INT[i], + M_ADDR_W_INT[i], + M_BASE_ADDR_INT[i] & ({ADDR_W{1'b1}} << M_ADDR_W_INT[i]), + M_BASE_ADDR_INT[i] | ({ADDR_W{1'b1}} >> (ADDR_W - M_ADDR_W_INT[i])) + ); + $display("%2d (%2d): %x / %2d -- %x-%x", + j/M_REGIONS, j%M_REGIONS, + M_BASE_ADDR_INT[j], + M_ADDR_W_INT[j], + M_BASE_ADDR_INT[j] & ({ADDR_W{1'b1}} << M_ADDR_W_INT[j]), + M_BASE_ADDR_INT[j] | ({ADDR_W{1'b1}} >> (ADDR_W - M_ADDR_W_INT[j])) + ); + $error("Error: address ranges overlap (instance %m)"); + $finish; + end + end + end + end +end + +localparam logic [0:0] + STATE_IDLE = 1'd0, + STATE_DECODE = 1'd1; + +logic [0:0] state_reg = STATE_IDLE, state_next; + +logic s_axi_aready_reg = 1'b0, s_axi_aready_next; + +logic [3:0] m_axi_aregion_reg = 4'd0, m_axi_aregion_next; +logic [SEL_W-1:0] m_select_reg = '0, m_select_next; +logic m_axi_avalid_reg = 1'b0, m_axi_avalid_next; +logic m_decerr_reg = 1'b0, m_decerr_next; +logic m_wc_valid_reg = 1'b0, m_wc_valid_next; +logic m_rc_valid_reg = 1'b0, m_rc_valid_next; + +assign s_axi_aready = s_axi_aready_reg; + +assign m_axi_aregion = m_axi_aregion_reg; +assign m_select = m_select_reg; +assign m_axi_avalid = m_axi_avalid_reg; + +assign m_wc_select = m_select_reg; +assign m_wc_decerr = m_decerr_reg; +assign m_wc_valid = m_wc_valid_reg; + +assign m_rc_decerr = m_decerr_reg; +assign m_rc_valid = m_rc_valid_reg; + +logic match; +logic trans_start; +logic trans_complete; + +localparam TR_CNT_W = $clog2(S_ACCEPT+1); +logic [TR_CNT_W-1:0] trans_count_reg = 0; +wire trans_limit = trans_count_reg >= TR_CNT_W'(S_ACCEPT) && !trans_complete; + +// transfer ID thread tracking +logic [ID_W-1:0] thread_id_reg[S_INT_THREADS-1:0]; +logic [SEL_W-1:0] thread_m_reg[S_INT_THREADS-1:0]; +logic [3:0] thread_region_reg[S_INT_THREADS-1:0]; +logic [$clog2(S_ACCEPT+1)-1:0] thread_count_reg[S_INT_THREADS-1:0]; + +// TODO fix loop +/* verilator lint_off UNOPTFLAT */ +wire [S_INT_THREADS-1:0] thread_active; +wire [S_INT_THREADS-1:0] thread_match; +wire [S_INT_THREADS-1:0] thread_match_dest; +wire [S_INT_THREADS-1:0] thread_cpl_match; +wire [S_INT_THREADS-1:0] thread_trans_start; +wire [S_INT_THREADS-1:0] thread_trans_complete; + +for (genvar n = 0; n < S_INT_THREADS; n = n + 1) begin + initial begin + thread_count_reg[n] = '0; + end + + assign thread_active[n] = thread_count_reg[n] != 0; + assign thread_match[n] = thread_active[n] && thread_id_reg[n] == s_axi_aid; + assign thread_match_dest[n] = thread_match[n] && thread_m_reg[n] == m_select_next && (M_REGIONS < 2 || thread_region_reg[n] == m_axi_aregion_next); + assign thread_cpl_match[n] = thread_active[n] && thread_id_reg[n] == s_cpl_id; + assign thread_trans_start[n] = (thread_match[n] || (!thread_active[n] && thread_match == 0 && (thread_trans_start & ({S_INT_THREADS{1'b1}} >> (S_INT_THREADS-n))) == 0)) && trans_start; + assign thread_trans_complete[n] = thread_cpl_match[n] && trans_complete; + + always_ff @(posedge clk) begin + if (thread_trans_start[n]) begin + thread_id_reg[n] <= s_axi_aid; + thread_m_reg[n] <= m_select_next; + thread_region_reg[n] <= m_axi_aregion_next; + end + + if (thread_trans_start[n] && !thread_trans_complete[n]) begin + thread_count_reg[n] <= thread_count_reg[n] + 1; + end else if (!thread_trans_start[n] && thread_trans_complete[n]) begin + thread_count_reg[n] <= thread_count_reg[n] - 1; + end + + if (rst) begin + thread_count_reg[n] <= 0; + end + end +end + +always_comb begin + state_next = STATE_IDLE; + + match = 1'b0; + trans_start = 1'b0; + trans_complete = 1'b0; + + s_axi_aready_next = 1'b0; + + m_axi_aregion_next = m_axi_aregion_reg; + m_select_next = m_select_reg; + m_axi_avalid_next = m_axi_avalid_reg && !m_axi_aready; + m_decerr_next = m_decerr_reg; + m_wc_valid_next = m_wc_valid_reg && !m_wc_ready; + m_rc_valid_next = m_rc_valid_reg && !m_rc_ready; + + case (state_reg) + STATE_IDLE: begin + // idle state, store values + s_axi_aready_next = 1'b0; + + if (s_axi_avalid && !s_axi_aready) begin + match = 1'b0; + for (integer i = 0; i < M_COUNT; i = i + 1) begin + for (integer j = 0; j < M_REGIONS; j = j + 1) begin + if (M_ADDR_W_INT[i*M_REGIONS+j] != 0 && (!M_SECURE_INT[i] || !s_axi_aprot[1]) && M_CONNECT_INT[i][S] && (s_axi_aaddr >> M_ADDR_W_INT[i*M_REGIONS+j]) == (M_BASE_ADDR_INT[i*M_REGIONS+j] >> M_ADDR_W_INT[i*M_REGIONS+j])) begin + m_select_next = SEL_W'(i); + m_axi_aregion_next = 4'(j); + match = 1'b1; + end + end + end + + if (match) begin + // address decode successful + if (!trans_limit && (thread_match_dest != 0 || (!(&thread_active) && thread_match == 0))) begin + // transaction limit not reached + m_axi_avalid_next = 1'b1; + m_decerr_next = 1'b0; + m_wc_valid_next = WC_OUTPUT; + m_rc_valid_next = 1'b0; + trans_start = 1'b1; + state_next = STATE_DECODE; + end else begin + // transaction limit reached; block in idle + state_next = STATE_IDLE; + end + end else begin + // decode error + m_axi_avalid_next = 1'b0; + m_decerr_next = 1'b1; + m_wc_valid_next = WC_OUTPUT; + m_rc_valid_next = 1'b1; + state_next = STATE_DECODE; + end + end else begin + state_next = STATE_IDLE; + end + end + STATE_DECODE: begin + if (!m_axi_avalid_next && (!m_wc_valid_next || !WC_OUTPUT) && !m_rc_valid_next) begin + s_axi_aready_next = 1'b1; + state_next = STATE_IDLE; + end else begin + state_next = STATE_DECODE; + end + end + endcase + + // manage completions + trans_complete = s_cpl_valid; +end + +always_ff @(posedge clk) begin + state_reg <= state_next; + s_axi_aready_reg <= s_axi_aready_next; + m_axi_avalid_reg <= m_axi_avalid_next; + m_wc_valid_reg <= m_wc_valid_next; + m_rc_valid_reg <= m_rc_valid_next; + + if (trans_start && !trans_complete) begin + trans_count_reg <= trans_count_reg + 1; + end else if (!trans_start && trans_complete) begin + trans_count_reg <= trans_count_reg - 1; + end + + m_axi_aregion_reg <= m_axi_aregion_next; + m_select_reg <= m_select_next; + m_decerr_reg <= m_decerr_next; + + if (rst) begin + state_reg <= STATE_IDLE; + s_axi_aready_reg <= 1'b0; + m_axi_avalid_reg <= 1'b0; + m_wc_valid_reg <= 1'b0; + m_rc_valid_reg <= 1'b0; + + trans_count_reg <= 0; + end +end + +endmodule + +`resetall diff --git a/src/axi/rtl/taxi_axi_crossbar_rd.f b/src/axi/rtl/taxi_axi_crossbar_rd.f new file mode 100644 index 0000000..3aee78a --- /dev/null +++ b/src/axi/rtl/taxi_axi_crossbar_rd.f @@ -0,0 +1,6 @@ +taxi_axi_crossbar_rd.sv +taxi_axi_crossbar_addr.sv +taxi_axi_register_rd.sv +taxi_axi_if.sv +../lib/taxi/src/prim/rtl/taxi_arbiter.sv +../lib/taxi/src/prim/rtl/taxi_penc.sv diff --git a/src/axi/rtl/taxi_axi_crossbar_rd.sv b/src/axi/rtl/taxi_axi_crossbar_rd.sv new file mode 100644 index 0000000..4a59891 --- /dev/null +++ b/src/axi/rtl/taxi_axi_crossbar_rd.sv @@ -0,0 +1,519 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2018-2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI4 crossbar (read) + */ +module taxi_axi_crossbar_rd # +( + // Number of AXI inputs (slave interfaces) + parameter S_COUNT = 4, + // Number of AXI outputs (master interfaces) + parameter M_COUNT = 4, + // Address width in bits for address decoding + parameter ADDR_W = 32, + // TODO fix parametrization once verilator issue 5890 is fixed + // Number of concurrent unique IDs for each slave interface + // S_COUNT concatenated fields of 32 bits + parameter S_THREADS = {S_COUNT{32'd2}}, + // Number of concurrent operations for each slave interface + // S_COUNT concatenated fields of 32 bits + parameter S_ACCEPT = {S_COUNT{32'd16}}, + // Number of regions per master interface + parameter M_REGIONS = 1, + // Master interface base addresses + // M_COUNT concatenated fields of M_REGIONS concatenated fields of ADDR_W bits + // set to zero for default addressing based on M_ADDR_W + parameter M_BASE_ADDR = '0, + // Master interface address widths + // M_COUNT concatenated fields of M_REGIONS concatenated fields of 32 bits + parameter M_ADDR_W = {M_COUNT{{M_REGIONS{32'd24}}}}, + // Read connections between interfaces + // M_COUNT concatenated fields of S_COUNT bits + parameter M_CONNECT = {M_COUNT{{S_COUNT{1'b1}}}}, + // Number of concurrent operations for each master interface + // M_COUNT concatenated fields of 32 bits + parameter M_ISSUE = {M_COUNT{32'd4}}, + // Secure master (fail operations based on awprot/arprot) + // M_COUNT bits + parameter M_SECURE = {M_COUNT{1'b0}}, + // Slave interface AR channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_AR_REG_TYPE = {S_COUNT{2'd0}}, + // Slave interface R channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_R_REG_TYPE = {S_COUNT{2'd2}}, + // Master interface AR channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_AR_REG_TYPE = {M_COUNT{2'd1}}, + // Master interface R channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_R_REG_TYPE = {M_COUNT{2'd0}} +) +( + input wire logic clk, + input wire logic rst, + + /* + * AXI4 slave interfaces + */ + taxi_axi_if.rd_slv s_axi_rd[S_COUNT], + + /* + * AXI4 master interfaces + */ + taxi_axi_if.rd_mst m_axi_rd[M_COUNT] +); + +// extract parameters +localparam DATA_W = s_axi_rd[0].DATA_W; +localparam S_ADDR_W = s_axi_rd[0].ADDR_W; +localparam STRB_W = s_axi_rd[0].STRB_W; +localparam S_ID_W = s_axi_rd[0].ID_W; +localparam M_ID_W = m_axi_rd[0].ID_W; +localparam logic ARUSER_EN = s_axi_rd[0].ARUSER_EN && m_axi_rd[0].ARUSER_EN; +localparam ARUSER_W = s_axi_rd[0].ARUSER_W; +localparam logic RUSER_EN = s_axi_rd[0].RUSER_EN && m_axi_rd[0].RUSER_EN; +localparam RUSER_W = s_axi_rd[0].RUSER_W; + +localparam CL_S_COUNT = $clog2(S_COUNT); +localparam CL_M_COUNT = $clog2(M_COUNT); +localparam CL_S_COUNT_INT = CL_S_COUNT > 0 ? CL_S_COUNT : 1; +localparam CL_M_COUNT_INT = CL_M_COUNT > 0 ? CL_M_COUNT : 1; +localparam M_COUNT_P1 = M_COUNT+1; +localparam CL_M_COUNT_P1 = $clog2(M_COUNT_P1); + +localparam [S_COUNT-1:0][31:0] S_THREADS_INT = S_THREADS; +localparam [S_COUNT-1:0][31:0] S_ACCEPT_INT = S_ACCEPT; +localparam [M_COUNT-1:0][31:0] M_ISSUE_INT = M_ISSUE; + +// check configuration +if (s_axi_rd[0].ADDR_W != ADDR_W) + $fatal(0, "Error: Interface ADDR_W parameter mismatch (instance %m)"); + +if (m_axi_rd[0].DATA_W != DATA_W) + $fatal(0, "Error: Interface DATA_W parameter mismatch (instance %m)"); + +if (m_axi_rd[0].STRB_W != STRB_W) + $fatal(0, "Error: Interface STRB_W parameter mismatch (instance %m)"); + +if (M_ID_W < S_ID_W+$clog2(S_COUNT)) + $fatal(0, "Error: M_ID_W must be at least $clog2(S_COUNT) larger than S_ID_W (instance %m)"); + +wire [S_ID_W-1:0] int_s_axi_arid[S_COUNT]; +wire [ADDR_W-1:0] int_s_axi_araddr[S_COUNT]; +wire [7:0] int_s_axi_arlen[S_COUNT]; +wire [2:0] int_s_axi_arsize[S_COUNT]; +wire [1:0] int_s_axi_arburst[S_COUNT]; +wire int_s_axi_arlock[S_COUNT]; +wire [3:0] int_s_axi_arcache[S_COUNT]; +wire [2:0] int_s_axi_arprot[S_COUNT]; +wire [3:0] int_s_axi_arqos[S_COUNT]; +wire [3:0] int_s_axi_arregion[S_COUNT]; +wire [ARUSER_W-1:0] int_s_axi_aruser[S_COUNT]; + +logic [M_COUNT-1:0] int_axi_arvalid[S_COUNT]; +logic [S_COUNT-1:0] int_axi_arready[M_COUNT]; + +wire [M_ID_W-1:0] int_m_axi_rid[M_COUNT]; +wire [DATA_W-1:0] int_m_axi_rdata[M_COUNT]; +wire [1:0] int_m_axi_rresp[M_COUNT]; +wire int_m_axi_rlast[M_COUNT]; +wire [RUSER_W-1:0] int_m_axi_ruser[M_COUNT]; + +logic [S_COUNT-1:0] int_axi_rvalid[M_COUNT]; +logic [M_COUNT-1:0] int_axi_rready[S_COUNT]; + +for (genvar m = 0; m < S_COUNT; m = m + 1) begin : s_ifaces + + taxi_axi_if #( + .DATA_W(s_axi_rd[0].DATA_W), + .ADDR_W(s_axi_rd[0].ADDR_W), + .STRB_W(s_axi_rd[0].STRB_W), + .ID_W(s_axi_rd[0].ID_W), + .ARUSER_EN(s_axi_rd[0].ARUSER_EN), + .ARUSER_W(s_axi_rd[0].ARUSER_W), + .RUSER_EN(s_axi_rd[0].RUSER_EN), + .RUSER_W(s_axi_rd[0].RUSER_W) + ) int_axi(); + + // S side register + taxi_axi_register_rd #( + .AR_REG_TYPE(S_AR_REG_TYPE[m*2 +: 2]), + .R_REG_TYPE(S_R_REG_TYPE[m*2 +: 2]) + ) + reg_inst ( + .clk(clk), + .rst(rst), + + /* + * AXI4 slave interface + */ + .s_axi_rd(s_axi_rd[m]), + + /* + * AXI4 master interface + */ + .m_axi_rd(int_axi) + ); + + // address decode and admission control + wire [CL_M_COUNT_INT-1:0] a_select; + + wire m_axi_avalid; + wire m_axi_aready; + + wire m_rc_decerr; + wire m_rc_valid; + wire m_rc_ready; + + wire [S_ID_W-1:0] s_cpl_id; + wire s_cpl_valid; + + taxi_axi_crossbar_addr #( + .S(m), + .S_COUNT(S_COUNT), + .M_COUNT(M_COUNT), + .SEL_W(CL_M_COUNT_INT), + .ADDR_W(ADDR_W), + .ID_W(S_ID_W), + .S_THREADS(S_THREADS_INT[m]), + .S_ACCEPT(S_ACCEPT_INT[m]), + .M_REGIONS(M_REGIONS), + .M_BASE_ADDR(M_BASE_ADDR), + .M_ADDR_W(M_ADDR_W), + .M_CONNECT(M_CONNECT), + .M_SECURE(M_SECURE), + .WC_OUTPUT(0) + ) + addr_inst ( + .clk(clk), + .rst(rst), + + /* + * Address input + */ + .s_axi_aid(int_axi.arid), + .s_axi_aaddr(int_axi.araddr), + .s_axi_aprot(int_axi.arprot), + .s_axi_aqos(int_axi.arqos), + .s_axi_avalid(int_axi.arvalid), + .s_axi_aready(int_axi.arready), + + /* + * Address output + */ + .m_axi_aregion(int_s_axi_arregion[m]), + .m_select(a_select), + .m_axi_avalid(m_axi_avalid), + .m_axi_aready(m_axi_aready), + + /* + * Write command output + */ + .m_wc_select(), + .m_wc_decerr(), + .m_wc_valid(), + .m_wc_ready(1'b1), + + /* + * Response command output + */ + .m_rc_decerr(m_rc_decerr), + .m_rc_valid(m_rc_valid), + .m_rc_ready(m_rc_ready), + + /* + * Completion input + */ + .s_cpl_id(s_cpl_id), + .s_cpl_valid(s_cpl_valid) + ); + + assign int_s_axi_arid[m] = int_axi.arid; + assign int_s_axi_araddr[m] = int_axi.araddr; + assign int_s_axi_arlen[m] = int_axi.arlen; + assign int_s_axi_arsize[m] = int_axi.arsize; + assign int_s_axi_arburst[m] = int_axi.arburst; + assign int_s_axi_arlock[m] = int_axi.arlock; + assign int_s_axi_arcache[m] = int_axi.arcache; + assign int_s_axi_arprot[m] = int_axi.arprot; + assign int_s_axi_arqos[m] = int_axi.arqos; + assign int_s_axi_aruser[m] = int_axi.aruser; + + always_comb begin + int_axi_arvalid[m] = '0; + int_axi_arvalid[m][a_select] = m_axi_avalid; + end + assign m_axi_aready = int_axi_arready[a_select][m]; + + // decode error handling + logic [S_ID_W-1:0] decerr_m_axi_rid_reg = '0, decerr_m_axi_rid_next; + logic decerr_m_axi_rlast_reg = 1'b0, decerr_m_axi_rlast_next; + logic decerr_m_axi_rvalid_reg = 1'b0, decerr_m_axi_rvalid_next; + wire decerr_m_axi_rready; + + logic [7:0] decerr_len_reg = 8'd0, decerr_len_next; + + assign m_rc_ready = !decerr_m_axi_rvalid_reg; + + always_comb begin + decerr_len_next = decerr_len_reg; + decerr_m_axi_rid_next = decerr_m_axi_rid_reg; + decerr_m_axi_rlast_next = decerr_m_axi_rlast_reg; + decerr_m_axi_rvalid_next = decerr_m_axi_rvalid_reg; + + if (decerr_m_axi_rvalid_reg) begin + if (decerr_m_axi_rready) begin + if (decerr_len_reg != 0) begin + decerr_len_next = decerr_len_reg-1; + decerr_m_axi_rlast_next = (decerr_len_next == 0); + decerr_m_axi_rvalid_next = 1'b1; + end else begin + decerr_m_axi_rvalid_next = 1'b0; + end + end + end else if (m_rc_valid && m_rc_ready) begin + decerr_len_next = int_axi.arlen; + decerr_m_axi_rid_next = int_axi.arid; + decerr_m_axi_rlast_next = (decerr_len_next == 0); + decerr_m_axi_rvalid_next = 1'b1; + end + end + + always_ff @(posedge clk) begin + decerr_m_axi_rvalid_reg <= decerr_m_axi_rvalid_next; + decerr_m_axi_rid_reg <= decerr_m_axi_rid_next; + decerr_m_axi_rlast_reg <= decerr_m_axi_rlast_next; + decerr_len_reg <= decerr_len_next; + + if (rst) begin + decerr_m_axi_rvalid_reg <= 1'b0; + end + end + + // read response arbitration + wire [M_COUNT_P1-1:0] r_req; + wire [M_COUNT_P1-1:0] r_ack; + wire [M_COUNT_P1-1:0] r_grant; + wire r_grant_valid; + wire [CL_M_COUNT_P1-1:0] r_grant_index; + + taxi_arbiter #( + .PORTS(M_COUNT_P1), + .ARB_ROUND_ROBIN(1), + .ARB_BLOCK(1), + .ARB_BLOCK_ACK(1), + .LSB_HIGH_PRIO(1) + ) + r_arb_inst ( + .clk(clk), + .rst(rst), + .req(r_req), + .ack(r_ack), + .grant(r_grant), + .grant_valid(r_grant_valid), + .grant_index(r_grant_index) + ); + + // read response mux + always_comb begin + if (r_grant_index == CL_M_COUNT_P1'(M_COUNT_P1-1)) begin + int_axi.rid = decerr_m_axi_rid_reg; + int_axi.rdata = '0; + int_axi.rresp = 2'b11; + int_axi.rlast = decerr_m_axi_rlast_reg; + int_axi.ruser = '0; + int_axi.rvalid = decerr_m_axi_rvalid_reg & r_grant_valid; + end else begin + int_axi.rid = S_ID_W'(int_m_axi_rid[r_grant_index[CL_M_COUNT_INT-1:0]]); + int_axi.rdata = int_m_axi_rdata[r_grant_index[CL_M_COUNT_INT-1:0]]; + int_axi.rresp = int_m_axi_rresp[r_grant_index[CL_M_COUNT_INT-1:0]]; + int_axi.rlast = int_m_axi_rlast[r_grant_index[CL_M_COUNT_INT-1:0]]; + int_axi.ruser = int_m_axi_ruser[r_grant_index[CL_M_COUNT_INT-1:0]]; + int_axi.rvalid = int_axi_rvalid[r_grant_index[CL_M_COUNT_INT-1:0]][m] & r_grant_valid; + end + end + + always_comb begin + int_axi_rready[m] = '0; + int_axi_rready[m][r_grant_index[CL_M_COUNT_INT-1:0]] = r_grant_valid && int_axi.rready; + end + + assign decerr_m_axi_rready = (r_grant_valid && int_axi.rready) && (r_grant_index == CL_M_COUNT_P1'(M_COUNT_P1-1)); + + for (genvar n = 0; n < M_COUNT; n = n + 1) begin + assign r_req[n] = int_axi_rvalid[n][m] && !r_grant[n]; + assign r_ack[n] = r_grant_valid && int_axi_rvalid[n][m] && int_axi.rlast && int_axi.rready; + end + + assign r_req[M_COUNT_P1-1] = decerr_m_axi_rvalid_reg && !r_grant[M_COUNT_P1-1]; + assign r_ack[M_COUNT_P1-1] = r_grant_valid && decerr_m_axi_rvalid_reg && decerr_m_axi_rlast_reg && int_axi.rready; + + assign s_cpl_id = int_axi.rid; + assign s_cpl_valid = int_axi.rvalid && int_axi.rready && int_axi.rlast; + +end // s_ifaces + +for (genvar n = 0; n < M_COUNT; n = n + 1) begin : m_ifaces + + taxi_axi_if #( + .DATA_W(m_axi_rd[0].DATA_W), + .ADDR_W(m_axi_rd[0].ADDR_W), + .STRB_W(m_axi_rd[0].STRB_W), + .ID_W(m_axi_rd[0].ID_W), + .ARUSER_EN(m_axi_rd[0].ARUSER_EN), + .ARUSER_W(m_axi_rd[0].ARUSER_W), + .RUSER_EN(m_axi_rd[0].RUSER_EN), + .RUSER_W(m_axi_rd[0].RUSER_W) + ) int_axi(); + + // in-flight transaction count + wire trans_start; + wire trans_complete; + localparam TR_CNT_W = $clog2(M_ISSUE_INT[n]+1); + logic [TR_CNT_W-1:0] trans_count_reg = '0; + + wire trans_limit = trans_count_reg >= TR_CNT_W'(M_ISSUE_INT[n]) && !trans_complete; + + always_ff @(posedge clk) begin + if (rst) begin + trans_count_reg <= 0; + end else begin + if (trans_start && !trans_complete) begin + trans_count_reg <= trans_count_reg + 1; + end else if (!trans_start && trans_complete) begin + trans_count_reg <= trans_count_reg - 1; + end + end + end + + // address arbitration + wire [S_COUNT-1:0] a_req; + wire [S_COUNT-1:0] a_ack; + wire [S_COUNT-1:0] a_grant; + wire a_grant_valid; + wire [CL_S_COUNT_INT-1:0] a_grant_index; + + if (S_COUNT > 1) begin : arb + + taxi_arbiter #( + .PORTS(S_COUNT), + .ARB_ROUND_ROBIN(1), + .ARB_BLOCK(1), + .ARB_BLOCK_ACK(1), + .LSB_HIGH_PRIO(1) + ) + a_arb_inst ( + .clk(clk), + .rst(rst), + .req(a_req), + .ack(a_ack), + .grant(a_grant), + .grant_valid(a_grant_valid), + .grant_index(a_grant_index) + ); + + end else begin + + logic grant_valid_reg = 1'b0; + + always @(posedge clk) begin + if (a_req) begin + grant_valid_reg <= 1'b1; + end + + if (a_ack || rst) begin + grant_valid_reg <= 1'b0; + end + end + + assign a_grant_valid = grant_valid_reg; + assign a_grant = grant_valid_reg; + assign a_grant_index = '0; + + end + + // address mux + if (S_COUNT > 1) begin + assign int_axi.arid = {a_grant_index, int_s_axi_arid[a_grant_index]}; + end else begin + assign int_axi.arid = int_s_axi_arid[a_grant_index]; + end + assign int_axi.araddr = int_s_axi_araddr[a_grant_index]; + assign int_axi.arlen = int_s_axi_arlen[a_grant_index]; + assign int_axi.arsize = int_s_axi_arsize[a_grant_index]; + assign int_axi.arburst = int_s_axi_arburst[a_grant_index]; + assign int_axi.arlock = int_s_axi_arlock[a_grant_index]; + assign int_axi.arcache = int_s_axi_arcache[a_grant_index]; + assign int_axi.arprot = int_s_axi_arprot[a_grant_index]; + assign int_axi.arqos = int_s_axi_arqos[a_grant_index]; + assign int_axi.arregion = int_s_axi_arregion[a_grant_index]; + assign int_axi.aruser = int_s_axi_aruser[a_grant_index]; + assign int_axi.arvalid = int_axi_arvalid[a_grant_index][n] && a_grant_valid; + + always_comb begin + int_axi_arready[n] = '0; + int_axi_arready[n][a_grant_index] = a_grant_valid && int_axi.arready; + end + + for (genvar m = 0; m < S_COUNT; m = m + 1) begin + assign a_req[m] = int_axi_arvalid[m][n] && !a_grant_valid && !trans_limit; + assign a_ack[m] = a_grant[m] && int_axi_arvalid[m][n] && int_axi.arready; + end + + assign trans_start = int_axi.arvalid && int_axi.arready && a_grant_valid; + + // read response forwarding + wire [CL_S_COUNT_INT-1:0] r_select = CL_S_COUNT_INT'(int_axi.rid >> S_ID_W); + + assign int_m_axi_rid[n] = int_axi.rid; + assign int_m_axi_rdata[n] = int_axi.rdata; + assign int_m_axi_rresp[n] = int_axi.rresp; + assign int_m_axi_rlast[n] = int_axi.rlast; + assign int_m_axi_ruser[n] = int_axi.ruser; + + always_comb begin + int_axi_rvalid[n] = '0; + int_axi_rvalid[n][r_select] = int_axi.rvalid; + end + assign int_axi.rready = int_axi_rready[r_select][n]; + + assign trans_complete = int_axi.rvalid && int_axi.rready && int_axi.rlast; + + // M side register + taxi_axi_register_rd #( + .AR_REG_TYPE(M_AR_REG_TYPE[n*2 +: 2]), + .R_REG_TYPE(M_R_REG_TYPE[n*2 +: 2]) + ) + reg_inst ( + .clk(clk), + .rst(rst), + + /* + * AXI4 slave interface + */ + .s_axi_rd(int_axi), + + /* + * AXI4 master interface + */ + .m_axi_rd(m_axi_rd[n]) + ); + +end // m_ifaces + +endmodule + +`resetall diff --git a/src/axi/rtl/taxi_axi_crossbar_wr.f b/src/axi/rtl/taxi_axi_crossbar_wr.f new file mode 100644 index 0000000..c03501a --- /dev/null +++ b/src/axi/rtl/taxi_axi_crossbar_wr.f @@ -0,0 +1,6 @@ +taxi_axi_crossbar_wr.sv +taxi_axi_crossbar_addr.sv +taxi_axi_register_wr.sv +taxi_axi_if.sv +../lib/taxi/src/prim/rtl/taxi_arbiter.sv +../lib/taxi/src/prim/rtl/taxi_penc.sv diff --git a/src/axi/rtl/taxi_axi_crossbar_wr.sv b/src/axi/rtl/taxi_axi_crossbar_wr.sv new file mode 100644 index 0000000..b4a7163 --- /dev/null +++ b/src/axi/rtl/taxi_axi_crossbar_wr.sv @@ -0,0 +1,611 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2018-2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI4 crossbar (write) + */ +module taxi_axi_crossbar_wr # +( + // Number of AXI inputs (slave interfaces) + parameter S_COUNT = 4, + // Number of AXI outputs (master interfaces) + parameter M_COUNT = 4, + // Address width in bits for address decoding + parameter ADDR_W = 32, + // TODO fix parametrization once verilator issue 5890 is fixed + // Number of concurrent unique IDs for each slave interface + // S_COUNT concatenated fields of 32 bits + parameter S_THREADS = {S_COUNT{32'd2}}, + // Number of concurrent operations for each slave interface + // S_COUNT concatenated fields of 32 bits + parameter S_ACCEPT = {S_COUNT{32'd16}}, + // Number of regions per master interface + parameter M_REGIONS = 1, + // Master interface base addresses + // M_COUNT concatenated fields of M_REGIONS concatenated fields of ADDR_W bits + // set to zero for default addressing based on M_ADDR_W + parameter M_BASE_ADDR = '0, + // Master interface address widths + // M_COUNT concatenated fields of M_REGIONS concatenated fields of 32 bits + parameter M_ADDR_W = {M_COUNT{{M_REGIONS{32'd24}}}}, + // Write connections between interfaces + // M_COUNT concatenated fields of S_COUNT bits + parameter M_CONNECT = {M_COUNT{{S_COUNT{1'b1}}}}, + // Number of concurrent operations for each master interface + // M_COUNT concatenated fields of 32 bits + parameter M_ISSUE = {M_COUNT{32'd4}}, + // Secure master (fail operations based on awprot/arprot) + // M_COUNT bits + parameter M_SECURE = {M_COUNT{1'b0}}, + // Slave interface AW channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_AW_REG_TYPE = {S_COUNT{2'd0}}, + // Slave interface W channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_W_REG_TYPE = {S_COUNT{2'd0}}, + // Slave interface B channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_B_REG_TYPE = {S_COUNT{2'd1}}, + // Master interface AW channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_AW_REG_TYPE = {M_COUNT{2'd1}}, + // Master interface W channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_W_REG_TYPE = {M_COUNT{2'd2}}, + // Master interface B channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_B_REG_TYPE = {M_COUNT{2'd0}} +) +( + input wire logic clk, + input wire logic rst, + + /* + * AXI4 slave interfaces + */ + taxi_axi_if.wr_slv s_axi_wr[S_COUNT], + + /* + * AXI4 master interfaces + */ + taxi_axi_if.wr_mst m_axi_wr[M_COUNT] +); + +// extract parameters +localparam DATA_W = s_axi_wr[0].DATA_W; +localparam S_ADDR_W = s_axi_wr[0].ADDR_W; +localparam STRB_W = s_axi_wr[0].STRB_W; +localparam S_ID_W = s_axi_wr[0].ID_W; +localparam M_ID_W = m_axi_wr[0].ID_W; +localparam logic AWUSER_EN = s_axi_wr[0].AWUSER_EN && m_axi_wr[0].AWUSER_EN; +localparam AWUSER_W = s_axi_wr[0].AWUSER_W; +localparam logic WUSER_EN = s_axi_wr[0].WUSER_EN && m_axi_wr[0].WUSER_EN; +localparam WUSER_W = s_axi_wr[0].WUSER_W; +localparam logic BUSER_EN = s_axi_wr[0].BUSER_EN && m_axi_wr[0].BUSER_EN; +localparam BUSER_W = s_axi_wr[0].BUSER_W; + +if (m_axi_wr.DATA_W != DATA_W) + $fatal(0, "Error: Interface DATA_W parameter mismatch (instance %m)"); + +if (m_axi_wr.STRB_W != STRB_W) + $fatal(0, "Error: Interface STRB_W parameter mismatch (instance %m)"); + +localparam CL_S_COUNT = $clog2(S_COUNT); +localparam CL_M_COUNT = $clog2(M_COUNT); +localparam CL_S_COUNT_INT = CL_S_COUNT > 0 ? CL_S_COUNT : 1; +localparam CL_M_COUNT_INT = CL_M_COUNT > 0 ? CL_M_COUNT : 1; +localparam M_COUNT_P1 = M_COUNT+1; +localparam CL_M_COUNT_P1 = $clog2(M_COUNT_P1); + +localparam [S_COUNT-1:0][31:0] S_THREADS_INT = S_THREADS; +localparam [S_COUNT-1:0][31:0] S_ACCEPT_INT = S_ACCEPT; +localparam [M_COUNT-1:0][31:0] M_ISSUE_INT = M_ISSUE; + +// check configuration +if (s_axi_wr[0].ADDR_W != ADDR_W) + $fatal(0, "Error: Interface ADDR_W parameter mismatch (instance %m)"); + +if (m_axi_wr[0].DATA_W != DATA_W) + $fatal(0, "Error: Interface DATA_W parameter mismatch (instance %m)"); + +if (m_axi_wr[0].STRB_W != STRB_W) + $fatal(0, "Error: Interface STRB_W parameter mismatch (instance %m)"); + +if (M_ID_W < S_ID_W+$clog2(S_COUNT)) + $fatal(0, "Error: M_ID_W must be at least $clog2(S_COUNT) larger than S_ID_W (instance %m)"); + +wire [S_ID_W-1:0] int_s_axi_awid[S_COUNT]; +wire [ADDR_W-1:0] int_s_axi_awaddr[S_COUNT]; +wire [7:0] int_s_axi_awlen[S_COUNT]; +wire [2:0] int_s_axi_awsize[S_COUNT]; +wire [1:0] int_s_axi_awburst[S_COUNT]; +wire int_s_axi_awlock[S_COUNT]; +wire [3:0] int_s_axi_awcache[S_COUNT]; +wire [2:0] int_s_axi_awprot[S_COUNT]; +wire [3:0] int_s_axi_awqos[S_COUNT]; +wire [3:0] int_s_axi_awregion[S_COUNT]; +wire [AWUSER_W-1:0] int_s_axi_awuser[S_COUNT]; + +logic [M_COUNT-1:0] int_axi_awvalid[S_COUNT]; +logic [S_COUNT-1:0] int_axi_awready[M_COUNT]; + +wire [DATA_W-1:0] int_s_axi_wdata[S_COUNT]; +wire [STRB_W-1:0] int_s_axi_wstrb[S_COUNT]; +wire int_s_axi_wlast[S_COUNT]; +wire [WUSER_W-1:0] int_s_axi_wuser[S_COUNT]; + +logic [M_COUNT-1:0] int_axi_wvalid[S_COUNT]; +logic [S_COUNT-1:0] int_axi_wready[M_COUNT]; + +wire [M_ID_W-1:0] int_m_axi_bid[M_COUNT]; +wire [1:0] int_m_axi_bresp[M_COUNT]; +wire [BUSER_W-1:0] int_m_axi_buser[M_COUNT]; + +logic [S_COUNT-1:0] int_axi_bvalid[M_COUNT]; +logic [M_COUNT-1:0] int_axi_bready[S_COUNT]; + +for (genvar m = 0; m < S_COUNT; m = m + 1) begin : s_ifaces + + taxi_axi_if #( + .DATA_W(s_axi_wr[0].DATA_W), + .ADDR_W(s_axi_wr[0].ADDR_W), + .STRB_W(s_axi_wr[0].STRB_W), + .ID_W(s_axi_wr[0].ID_W), + .AWUSER_EN(s_axi_wr[0].AWUSER_EN), + .AWUSER_W(s_axi_wr[0].AWUSER_W), + .WUSER_EN(s_axi_wr[0].WUSER_EN), + .WUSER_W(s_axi_wr[0].WUSER_W), + .BUSER_EN(s_axi_wr[0].BUSER_EN), + .BUSER_W(s_axi_wr[0].BUSER_W) + ) int_axi(); + + // S side register + taxi_axi_register_wr #( + .AW_REG_TYPE(S_AW_REG_TYPE[m*2 +: 2]), + .W_REG_TYPE(S_W_REG_TYPE[m*2 +: 2]), + .B_REG_TYPE(S_B_REG_TYPE[m*2 +: 2]) + ) + reg_inst ( + .clk(clk), + .rst(rst), + + /* + * AXI4 slave interface + */ + .s_axi_wr(s_axi_wr[m]), + + /* + * AXI4 master interface + */ + .m_axi_wr(int_axi) + ); + + // address decode and admission control + wire [CL_M_COUNT_INT-1:0] a_select; + + wire m_axi_avalid; + wire m_axi_aready; + + wire [CL_M_COUNT_INT-1:0] m_wc_select; + wire m_wc_decerr; + wire m_wc_valid; + wire m_wc_ready; + + wire m_rc_decerr; + wire m_rc_valid; + wire m_rc_ready; + + wire [S_ID_W-1:0] s_cpl_id; + wire s_cpl_valid; + + taxi_axi_crossbar_addr #( + .S(m), + .S_COUNT(S_COUNT), + .M_COUNT(M_COUNT), + .SEL_W(CL_M_COUNT_INT), + .ADDR_W(ADDR_W), + .ID_W(S_ID_W), + .S_THREADS(S_THREADS_INT[m]), + .S_ACCEPT(S_ACCEPT_INT[m]), + .M_REGIONS(M_REGIONS), + .M_BASE_ADDR(M_BASE_ADDR), + .M_ADDR_W(M_ADDR_W), + .M_CONNECT(M_CONNECT), + .M_SECURE(M_SECURE), + .WC_OUTPUT(1) + ) + addr_inst ( + .clk(clk), + .rst(rst), + + /* + * Address input + */ + .s_axi_aid(int_axi.awid), + .s_axi_aaddr(int_axi.awaddr), + .s_axi_aprot(int_axi.awprot), + .s_axi_aqos(int_axi.awqos), + .s_axi_avalid(int_axi.awvalid), + .s_axi_aready(int_axi.awready), + + /* + * Address output + */ + .m_axi_aregion(int_s_axi_awregion[m]), + .m_select(a_select), + .m_axi_avalid(m_axi_avalid), + .m_axi_aready(m_axi_aready), + + /* + * Write command output + */ + .m_wc_select(m_wc_select), + .m_wc_decerr(m_wc_decerr), + .m_wc_valid(m_wc_valid), + .m_wc_ready(m_wc_ready), + + /* + * Response command output + */ + .m_rc_decerr(m_rc_decerr), + .m_rc_valid(m_rc_valid), + .m_rc_ready(m_rc_ready), + + /* + * Completion input + */ + .s_cpl_id(s_cpl_id), + .s_cpl_valid(s_cpl_valid) + ); + + assign int_s_axi_awid[m] = int_axi.awid; + assign int_s_axi_awaddr[m] = int_axi.awaddr; + assign int_s_axi_awlen[m] = int_axi.awlen; + assign int_s_axi_awsize[m] = int_axi.awsize; + assign int_s_axi_awburst[m] = int_axi.awburst; + assign int_s_axi_awlock[m] = int_axi.awlock; + assign int_s_axi_awcache[m] = int_axi.awcache; + assign int_s_axi_awprot[m] = int_axi.awprot; + assign int_s_axi_awqos[m] = int_axi.awqos; + assign int_s_axi_awuser[m] = int_axi.awuser; + + always_comb begin + int_axi_awvalid[m] = '0; + int_axi_awvalid[m][a_select] = m_axi_avalid; + end + assign m_axi_aready = int_axi_awready[a_select][m]; + + // write command handling + logic [CL_M_COUNT_INT-1:0] w_select_reg = '0, w_select_next; + logic w_drop_reg = 1'b0, w_drop_next; + logic w_select_valid_reg = 1'b0, w_select_valid_next; + + assign m_wc_ready = !w_select_valid_reg; + + always_comb begin + w_select_next = w_select_reg; + w_drop_next = w_drop_reg && !(int_axi.wvalid && int_axi.wready && int_axi.wlast); + w_select_valid_next = w_select_valid_reg && !(int_axi.wvalid && int_axi.wready && int_axi.wlast); + + if (m_wc_valid && !w_select_valid_reg) begin + w_select_next = m_wc_select; + w_drop_next = m_wc_decerr; + w_select_valid_next = m_wc_valid; + end + end + + always_ff @(posedge clk) begin + w_select_valid_reg <= w_select_valid_next; + w_select_reg <= w_select_next; + w_drop_reg <= w_drop_next; + + if (rst) begin + w_select_valid_reg <= 1'b0; + end + end + + // write data forwarding + assign int_s_axi_wdata[m] = int_axi.wdata; + assign int_s_axi_wstrb[m] = int_axi.wstrb; + assign int_s_axi_wlast[m] = int_axi.wlast; + assign int_s_axi_wuser[m] = int_axi.wuser; + + always_comb begin + int_axi_wvalid[m] = '0; + int_axi_wvalid[m][w_select_reg] = int_axi.wvalid && w_select_valid_reg && !w_drop_reg; + end + assign int_axi.wready = int_axi_wready[w_select_reg][m] || w_drop_reg; + + // decode error handling + logic [S_ID_W-1:0] decerr_m_axi_bid_reg = '0, decerr_m_axi_bid_next; + logic decerr_m_axi_bvalid_reg = 1'b0, decerr_m_axi_bvalid_next; + wire decerr_m_axi_bready; + + assign m_rc_ready = !decerr_m_axi_bvalid_reg; + + always_comb begin + decerr_m_axi_bid_next = decerr_m_axi_bid_reg; + decerr_m_axi_bvalid_next = decerr_m_axi_bvalid_reg; + + if (decerr_m_axi_bvalid_reg) begin + if (decerr_m_axi_bready) begin + decerr_m_axi_bvalid_next = 1'b0; + end + end else if (m_rc_valid && m_rc_ready) begin + decerr_m_axi_bid_next = int_s_axi_awid[m]; + decerr_m_axi_bvalid_next = 1'b1; + end + end + + always_ff @(posedge clk) begin + if (rst) begin + decerr_m_axi_bvalid_reg <= 1'b0; + end else begin + decerr_m_axi_bvalid_reg <= decerr_m_axi_bvalid_next; + end + + decerr_m_axi_bid_reg <= decerr_m_axi_bid_next; + end + + // write response arbitration + wire [M_COUNT_P1-1:0] b_req; + wire [M_COUNT_P1-1:0] b_ack; + wire [M_COUNT_P1-1:0] b_grant; + wire b_grant_valid; + wire [CL_M_COUNT_P1-1:0] b_grant_index; + + taxi_arbiter #( + .PORTS(M_COUNT_P1), + .ARB_ROUND_ROBIN(1), + .ARB_BLOCK(1), + .ARB_BLOCK_ACK(1), + .LSB_HIGH_PRIO(1) + ) + b_arb_inst ( + .clk(clk), + .rst(rst), + .req(b_req), + .ack(b_ack), + .grant(b_grant), + .grant_valid(b_grant_valid), + .grant_index(b_grant_index) + ); + + // write response mux + always_comb begin + if (b_grant_index == CL_M_COUNT_P1'(M_COUNT_P1-1)) begin + int_axi.bid = decerr_m_axi_bid_reg; + int_axi.bresp = 2'b11; + int_axi.buser = '0; + int_axi.bvalid = decerr_m_axi_bvalid_reg & b_grant_valid; + end else begin + int_axi.bid = S_ID_W'(int_m_axi_bid[b_grant_index[CL_M_COUNT_INT-1:0]]); + int_axi.bresp = int_m_axi_bresp[b_grant_index[CL_M_COUNT_INT-1:0]]; + int_axi.buser = int_m_axi_buser[b_grant_index[CL_M_COUNT_INT-1:0]]; + int_axi.bvalid = int_axi_bvalid[b_grant_index[CL_M_COUNT_INT-1:0]][m] & b_grant_valid; + end + end + + always_comb begin + int_axi_bready[m] = '0; + int_axi_bready[m][b_grant_index[CL_M_COUNT_INT-1:0]] = b_grant_valid && int_axi.bready; + end + + assign decerr_m_axi_bready = (b_grant_valid && int_axi.bready) && (b_grant_index == CL_M_COUNT_P1'(M_COUNT_P1-1)); + + for (genvar n = 0; n < M_COUNT; n = n + 1) begin + assign b_req[n] = int_axi_bvalid[n][m] && !b_grant[n]; + assign b_ack[n] = b_grant[n] && int_axi_bvalid[n][m] && int_axi.bready; + end + + assign b_req[M_COUNT_P1-1] = decerr_m_axi_bvalid_reg && !b_grant[M_COUNT_P1-1]; + assign b_ack[M_COUNT_P1-1] = b_grant[M_COUNT_P1-1] && decerr_m_axi_bvalid_reg && int_axi.bready; + + assign s_cpl_id = int_axi.bid; + assign s_cpl_valid = int_axi.bvalid && int_axi.bready; + +end // s_ifaces + +for (genvar n = 0; n < M_COUNT; n = n + 1) begin : m_ifaces + + taxi_axi_if #( + .DATA_W(m_axi_wr[0].DATA_W), + .ADDR_W(m_axi_wr[0].ADDR_W), + .STRB_W(m_axi_wr[0].STRB_W), + .ID_W(m_axi_wr[0].ID_W), + .AWUSER_EN(m_axi_wr[0].AWUSER_EN), + .AWUSER_W(m_axi_wr[0].AWUSER_W), + .WUSER_EN(m_axi_wr[0].WUSER_EN), + .WUSER_W(m_axi_wr[0].WUSER_W), + .BUSER_EN(m_axi_wr[0].BUSER_EN), + .BUSER_W(m_axi_wr[0].BUSER_W) + ) int_axi(); + + // in-flight transaction count + wire trans_start; + wire trans_complete; + localparam TR_CNT_W = $clog2(M_ISSUE_INT[n]+1); + logic [TR_CNT_W-1:0] trans_count_reg = '0; + + wire trans_limit = trans_count_reg >= TR_CNT_W'(M_ISSUE_INT[n]) && !trans_complete; + + always_ff @(posedge clk) begin + if (trans_start && !trans_complete) begin + trans_count_reg <= trans_count_reg + 1; + end else if (!trans_start && trans_complete) begin + trans_count_reg <= trans_count_reg - 1; + end + + if (rst) begin + trans_count_reg <= 0; + end + end + + // address arbitration + logic [CL_S_COUNT_INT-1:0] w_select_reg = '0, w_select_next; + logic w_select_valid_reg = 1'b0, w_select_valid_next; + logic w_select_new_reg = 1'b0, w_select_new_next; + + wire [S_COUNT-1:0] a_req; + wire [S_COUNT-1:0] a_ack; + wire [S_COUNT-1:0] a_grant; + wire a_grant_valid; + wire [CL_S_COUNT_INT-1:0] a_grant_index; + + if (S_COUNT > 1) begin : arb + + taxi_arbiter #( + .PORTS(S_COUNT), + .ARB_ROUND_ROBIN(1), + .ARB_BLOCK(1), + .ARB_BLOCK_ACK(1), + .LSB_HIGH_PRIO(1) + ) + a_arb_inst ( + .clk(clk), + .rst(rst), + .req(a_req), + .ack(a_ack), + .grant(a_grant), + .grant_valid(a_grant_valid), + .grant_index(a_grant_index) + ); + + end else begin + + logic grant_valid_reg = 1'b0; + + always @(posedge clk) begin + if (a_req) begin + grant_valid_reg <= 1'b1; + end + + if (a_ack || rst) begin + grant_valid_reg <= 1'b0; + end + end + + assign a_grant_valid = grant_valid_reg; + assign a_grant = grant_valid_reg; + assign a_grant_index = '0; + + end + + // address mux + if (S_COUNT > 1) begin + assign int_axi.awid = {a_grant_index, int_s_axi_awid[a_grant_index]}; + end else begin + assign int_axi.awid = int_s_axi_awid[a_grant_index]; + end + assign int_axi.awaddr = int_s_axi_awaddr[a_grant_index]; + assign int_axi.awlen = int_s_axi_awlen[a_grant_index]; + assign int_axi.awsize = int_s_axi_awsize[a_grant_index]; + assign int_axi.awburst = int_s_axi_awburst[a_grant_index]; + assign int_axi.awlock = int_s_axi_awlock[a_grant_index]; + assign int_axi.awcache = int_s_axi_awcache[a_grant_index]; + assign int_axi.awprot = int_s_axi_awprot[a_grant_index]; + assign int_axi.awqos = int_s_axi_awqos[a_grant_index]; + assign int_axi.awregion = int_s_axi_awregion[a_grant_index]; + assign int_axi.awuser = int_s_axi_awuser[a_grant_index]; + assign int_axi.awvalid = int_axi_awvalid[a_grant_index][n] && a_grant_valid; + + always_comb begin + int_axi_awready[n] = '0; + int_axi_awready[n][a_grant_index] = a_grant_valid && int_axi.awready; + end + + for (genvar m = 0; m < S_COUNT; m = m + 1) begin + assign a_req[m] = int_axi_awvalid[m][n] && !a_grant_valid && !trans_limit && !w_select_valid_next; + assign a_ack[m] = a_grant[m] && int_axi_awvalid[m][n] && int_axi.awready; + end + + assign trans_start = int_axi.awvalid && int_axi.awready && a_grant_valid; + + // write data mux + assign int_axi.wdata = int_s_axi_wdata[w_select_reg]; + assign int_axi.wstrb = int_s_axi_wstrb[w_select_reg]; + assign int_axi.wlast = int_s_axi_wlast[w_select_reg]; + assign int_axi.wuser = int_s_axi_wuser[w_select_reg]; + assign int_axi.wvalid = int_axi_wvalid[w_select_reg][n] && w_select_valid_reg; + + always_comb begin + int_axi_wready[n] = '0; + int_axi_wready[n][w_select_reg] = w_select_valid_reg && int_axi.wready; + end + + // write data routing + always_comb begin + w_select_next = w_select_reg; + w_select_valid_next = w_select_valid_reg && !(int_axi.wvalid && int_axi.wready && int_axi.wlast); + w_select_new_next = w_select_new_reg || a_grant_valid == 0 || a_ack != 0; + + if (a_grant_valid && !w_select_valid_reg && w_select_new_reg) begin + w_select_next = a_grant_index; + w_select_valid_next = a_grant_valid; + w_select_new_next = 1'b0; + end + end + + always_ff @(posedge clk) begin + w_select_reg <= w_select_next; + w_select_valid_reg <= w_select_valid_next; + w_select_new_reg <= w_select_new_next; + + if (rst) begin + w_select_valid_reg <= 1'b0; + w_select_new_reg <= 1'b1; + end + end + + // write response forwarding + wire [CL_S_COUNT_INT-1:0] b_select = CL_S_COUNT_INT'(int_axi.bid >> S_ID_W); + + assign int_m_axi_bid[n] = int_axi.bid; + assign int_m_axi_bresp[n] = int_axi.bresp; + assign int_m_axi_buser[n] = int_axi.buser; + + always_comb begin + int_axi_bvalid[n] = '0; + int_axi_bvalid[n][b_select] = int_axi.bvalid; + end + assign int_axi.bready = int_axi_bready[b_select][n]; + + assign trans_complete = int_axi.bvalid && int_axi.bready; + + // M side register + taxi_axi_register_wr #( + .AW_REG_TYPE(M_AW_REG_TYPE[n*2 +: 2]), + .W_REG_TYPE(M_W_REG_TYPE[n*2 +: 2]), + .B_REG_TYPE(M_B_REG_TYPE[n*2 +: 2]) + ) + reg_inst ( + .clk(clk), + .rst(rst), + + /* + * AXI4 slave interface + */ + .s_axi_wr(int_axi), + + /* + * AXI4 master interface + */ + .m_axi_wr(m_axi_wr[n]) + ); + +end // m_ifaces + +endmodule + +`resetall diff --git a/src/axi/tb/taxi_axi_crossbar/Makefile b/src/axi/tb/taxi_axi_crossbar/Makefile new file mode 100644 index 0000000..30590c0 --- /dev/null +++ b/src/axi/tb/taxi_axi_crossbar/Makefile @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: CERN-OHL-S-2.0 +# +# Copyright (c) 2020-2025 +# +# Authors: +# - Alex Forencich + +TOPLEVEL_LANG = verilog + +SIM ?= verilator +WAVES ?= 0 + +COCOTB_HDL_TIMEUNIT = 1ns +COCOTB_HDL_TIMEPRECISION = 1ps + +RTL_DIR = ../../rtl +LIB_DIR = ../../lib +TAXI_SRC_DIR = $(LIB_DIR)/taxi/src + +DUT = taxi_axi_crossbar +COCOTB_TEST_MODULES = test_$(DUT) +COCOTB_TOPLEVEL = test_$(DUT) +MODULE = $(COCOTB_TEST_MODULES) +TOPLEVEL = $(COCOTB_TOPLEVEL) +VERILOG_SOURCES += $(COCOTB_TOPLEVEL).sv +VERILOG_SOURCES += $(RTL_DIR)/$(DUT).f + +# handle file list files +process_f_file = $(call process_f_files,$(addprefix $(dir $1),$(shell cat $1))) +process_f_files = $(foreach f,$1,$(if $(filter %.f,$f),$(call process_f_file,$f),$f)) +uniq_base = $(if $1,$(call uniq_base,$(foreach f,$1,$(if $(filter-out $(notdir $(lastword $1)),$(notdir $f)),$f,))) $(lastword $1)) +VERILOG_SOURCES := $(call uniq_base,$(call process_f_files,$(VERILOG_SOURCES))) + +# module parameters +export PARAM_S_COUNT := 4 +export PARAM_M_COUNT := 4 +export PARAM_DATA_W := 32 +export PARAM_ADDR_W := 32 +export PARAM_STRB_W := $(shell expr $(PARAM_DATA_W) / 8 ) +export PARAM_S_ID_W := 8 +export PARAM_M_ID_W := $(shell expr $(PARAM_S_ID_W) + 2 ) +export PARAM_AWUSER_EN := 0 +export PARAM_AWUSER_W := 1 +export PARAM_WUSER_EN := 0 +export PARAM_WUSER_W := 1 +export PARAM_BUSER_EN := 0 +export PARAM_BUSER_W := 1 +export PARAM_ARUSER_EN := 0 +export PARAM_ARUSER_W := 1 +export PARAM_RUSER_EN := 0 +export PARAM_RUSER_W := 1 +export PARAM_M_REGIONS := 1 + +ifeq ($(SIM), icarus) + PLUSARGS += -fst + + COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(COCOTB_TOPLEVEL).$(subst PARAM_,,$(v))=$($(v))) +else ifeq ($(SIM), verilator) + COMPILE_ARGS += -Wno-WIDTH + + COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v))) + + ifeq ($(WAVES), 1) + COMPILE_ARGS += --trace-fst + VERILATOR_TRACE = 1 + endif +endif + +include $(shell cocotb-config --makefiles)/Makefile.sim diff --git a/src/axi/tb/taxi_axi_crossbar/test_taxi_axi_crossbar.py b/src/axi/tb/taxi_axi_crossbar/test_taxi_axi_crossbar.py new file mode 100644 index 0000000..1981e3f --- /dev/null +++ b/src/axi/tb/taxi_axi_crossbar/test_taxi_axi_crossbar.py @@ -0,0 +1,281 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: CERN-OHL-S-2.0 +""" + +Copyright (c) 2020-2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +""" + +import itertools +import logging +import os +import random + +import cocotb_test.simulator +import pytest + +import cocotb +from cocotb.clock import Clock +from cocotb.triggers import RisingEdge, Timer +from cocotb.regression import TestFactory + +from cocotbext.axi import AxiBus, AxiMaster, AxiRam + + +class TB(object): + def __init__(self, dut): + self.dut = dut + + self.log = logging.getLogger("cocotb.tb") + self.log.setLevel(logging.DEBUG) + + cocotb.start_soon(Clock(dut.clk, 10, units="ns").start()) + + self.axi_master = [AxiMaster(AxiBus.from_entity(ch), dut.clk, dut.rst) for ch in dut.s_axi] + self.axi_ram = [AxiRam(AxiBus.from_entity(ch), dut.clk, dut.rst, size=2**16) for ch in dut.m_axi] + + for ram in self.axi_ram: + # prevent X propagation from screwing things up - "anything but X!" + # (X on bid and rid can propagate X to ready/valid) + ram.write_if.b_channel.bus.bid.setimmediatevalue(0) + ram.read_if.r_channel.bus.rid.setimmediatevalue(0) + + def set_idle_generator(self, generator=None): + if generator: + for master in self.axi_master: + master.write_if.aw_channel.set_pause_generator(generator()) + master.write_if.w_channel.set_pause_generator(generator()) + master.read_if.ar_channel.set_pause_generator(generator()) + for ram in self.axi_ram: + ram.write_if.b_channel.set_pause_generator(generator()) + ram.read_if.r_channel.set_pause_generator(generator()) + + def set_backpressure_generator(self, generator=None): + if generator: + for master in self.axi_master: + master.write_if.b_channel.set_pause_generator(generator()) + master.read_if.r_channel.set_pause_generator(generator()) + for ram in self.axi_ram: + ram.write_if.aw_channel.set_pause_generator(generator()) + ram.write_if.w_channel.set_pause_generator(generator()) + ram.read_if.ar_channel.set_pause_generator(generator()) + + async def cycle_reset(self): + self.dut.rst.setimmediatevalue(0) + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + self.dut.rst.value = 1 + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + self.dut.rst.value = 0 + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + + +async def run_test_write(dut, data_in=None, idle_inserter=None, backpressure_inserter=None, size=None, s=0, m=0): + + tb = TB(dut) + + byte_lanes = tb.axi_master[s].write_if.byte_lanes + max_burst_size = tb.axi_master[s].write_if.max_burst_size + + if size is None: + size = max_burst_size + + await tb.cycle_reset() + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + for length in list(range(1, byte_lanes*2))+[1024]: + for offset in list(range(byte_lanes, byte_lanes*2))+list(range(4096-byte_lanes, 4096)): + tb.log.info("length %d, offset %d, size %d", length, offset, size) + ram_addr = offset+0x1000 + addr = ram_addr + m*0x1000000 + test_data = bytearray([x % 256 for x in range(length)]) + + tb.axi_ram[m].write(ram_addr-128, b'\xaa'*(length+256)) + + await tb.axi_master[s].write(addr, test_data, size=size) + + tb.log.debug("%s", tb.axi_ram[m].hexdump_str((ram_addr & ~0xf)-16, (((ram_addr & 0xf)+length-1) & ~0xf)+48)) + + assert tb.axi_ram[m].read(ram_addr, length) == test_data + assert tb.axi_ram[m].read(ram_addr-1, 1) == b'\xaa' + assert tb.axi_ram[m].read(ram_addr+length, 1) == b'\xaa' + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +async def run_test_read(dut, data_in=None, idle_inserter=None, backpressure_inserter=None, size=None, s=0, m=0): + + tb = TB(dut) + + byte_lanes = tb.axi_master[s].write_if.byte_lanes + max_burst_size = tb.axi_master[s].write_if.max_burst_size + + if size is None: + size = max_burst_size + + await tb.cycle_reset() + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + for length in list(range(1, byte_lanes*2))+[1024]: + for offset in list(range(byte_lanes, byte_lanes*2))+list(range(4096-byte_lanes, 4096)): + tb.log.info("length %d, offset %d, size %d", length, offset, size) + ram_addr = offset+0x1000 + addr = ram_addr + m*0x1000000 + test_data = bytearray([x % 256 for x in range(length)]) + + tb.axi_ram[m].write(ram_addr, test_data) + + data = await tb.axi_master[s].read(addr, length, size=size) + + assert data.data == test_data + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +async def run_stress_test(dut, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + await tb.cycle_reset() + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + async def worker(master, offset, aperture, count=16): + for k in range(count): + m = random.randrange(len(tb.axi_ram)) + length = random.randint(1, min(512, aperture)) + addr = offset+random.randint(0, aperture-length) + m*0x1000000 + test_data = bytearray([x % 256 for x in range(length)]) + + await Timer(random.randint(1, 100), 'ns') + + await master.write(addr, test_data) + + await Timer(random.randint(1, 100), 'ns') + + data = await master.read(addr, length) + assert data.data == test_data + + workers = [] + + for k in range(16): + workers.append(cocotb.start_soon(worker(tb.axi_master[k % len(tb.axi_master)], k*0x1000, 0x1000, count=16))) + + while workers: + await workers.pop(0).join() + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +def cycle_pause(): + return itertools.cycle([1, 1, 1, 0]) + + +if getattr(cocotb, 'top', None) is not None: + + s_count = len(cocotb.top.s_axi) + m_count = len(cocotb.top.m_axi) + + data_w = len(cocotb.top.s_axi[0].wdata) + byte_lanes = data_w // 8 + max_burst_size = (byte_lanes-1).bit_length() + + for test in [run_test_write, run_test_read]: + + factory = TestFactory(test) + factory.add_option("idle_inserter", [None, cycle_pause]) + factory.add_option("backpressure_inserter", [None, cycle_pause]) + # factory.add_option("size", [None]+list(range(max_burst_size))) + factory.add_option("s", range(min(s_count, 2))) + factory.add_option("m", range(min(m_count, 2))) + factory.generate_tests() + + factory = TestFactory(run_stress_test) + factory.generate_tests() + + +# cocotb-test + +tests_dir = os.path.abspath(os.path.dirname(__file__)) +rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl')) +lib_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'lib')) +taxi_src_dir = os.path.abspath(os.path.join(lib_dir, 'taxi', 'src')) + + +def process_f_files(files): + lst = {} + for f in files: + if f[-2:].lower() == '.f': + with open(f, 'r') as fp: + l = fp.read().split() + for f in process_f_files([os.path.join(os.path.dirname(f), x) for x in l]): + lst[os.path.basename(f)] = f + else: + lst[os.path.basename(f)] = f + return list(lst.values()) + + +@pytest.mark.parametrize("data_w", [8, 16, 32]) +@pytest.mark.parametrize("m_count", [1, 4]) +@pytest.mark.parametrize("s_count", [1, 4]) +def test_taxi_axi_crossbar(request, s_count, m_count, data_w): + dut = "taxi_axi_crossbar" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = module + + verilog_sources = [ + os.path.join(tests_dir, f"{toplevel}.sv"), + os.path.join(rtl_dir, f"{dut}.f"), + ] + + verilog_sources = process_f_files(verilog_sources) + + parameters = {} + + parameters['S_COUNT'] = s_count + parameters['M_COUNT'] = m_count + parameters['DATA_W'] = data_w + parameters['ADDR_W'] = 32 + parameters['STRB_W'] = parameters['DATA_W'] // 8 + parameters['S_ID_W'] = 8 + parameters['M_ID_W'] = parameters['S_ID_W'] + (s_count-1).bit_length() + parameters['AWUSER_EN'] = 0 + parameters['AWUSER_W'] = 1 + parameters['WUSER_EN'] = 0 + parameters['WUSER_W'] = 1 + parameters['BUSER_EN'] = 0 + parameters['BUSER_W'] = 1 + parameters['ARUSER_EN'] = 0 + parameters['ARUSER_W'] = 1 + parameters['RUSER_EN'] = 0 + parameters['RUSER_W'] = 1 + parameters['M_REGIONS'] = 1 + + extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} + + sim_build = os.path.join(tests_dir, "sim_build", + request.node.name.replace('[', '-').replace(']', '')) + + cocotb_test.simulator.run( + simulator="verilator", + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + parameters=parameters, + sim_build=sim_build, + extra_env=extra_env, + ) diff --git a/src/axi/tb/taxi_axi_crossbar/test_taxi_axi_crossbar.sv b/src/axi/tb/taxi_axi_crossbar/test_taxi_axi_crossbar.sv new file mode 100644 index 0000000..e21b94e --- /dev/null +++ b/src/axi/tb/taxi_axi_crossbar/test_taxi_axi_crossbar.sv @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI4 crossbar testbench + */ +module test_taxi_axi_crossbar # +( + /* verilator lint_off WIDTHTRUNC */ + parameter S_COUNT = 4, + parameter M_COUNT = 4, + parameter DATA_W = 32, + parameter ADDR_W = 32, + parameter STRB_W = (DATA_W/8), + parameter S_ID_W = 8, + parameter M_ID_W = S_ID_W+$clog2(S_COUNT), + parameter logic AWUSER_EN = 1'b0, + parameter AWUSER_W = 1, + parameter logic WUSER_EN = 1'b0, + parameter WUSER_W = 1, + parameter logic BUSER_EN = 1'b0, + parameter BUSER_W = 1, + parameter logic ARUSER_EN = 1'b0, + parameter ARUSER_W = 1, + parameter logic RUSER_EN = 1'b0, + parameter RUSER_W = 1, + parameter S_THREADS = {S_COUNT{32'd2}}, + parameter S_ACCEPT = {S_COUNT{32'd16}}, + parameter M_REGIONS = 1, + parameter M_BASE_ADDR = '0, + parameter M_ADDR_W = {M_COUNT{{M_REGIONS{32'd24}}}}, + parameter M_CONNECT_RD = {M_COUNT{{S_COUNT{1'b1}}}}, + parameter M_CONNECT_WR = {M_COUNT{{S_COUNT{1'b1}}}}, + parameter M_ISSUE = {M_COUNT{32'd4}}, + parameter M_SECURE = {M_COUNT{1'b0}}, + parameter S_AW_REG_TYPE = {S_COUNT{2'd0}}, + parameter S_W_REG_TYPE = {S_COUNT{2'd0}}, + parameter S_B_REG_TYPE = {S_COUNT{2'd1}}, + parameter S_AR_REG_TYPE = {S_COUNT{2'd0}}, + parameter S_R_REG_TYPE = {S_COUNT{2'd2}}, + parameter M_AW_REG_TYPE = {M_COUNT{2'd1}}, + parameter M_W_REG_TYPE = {M_COUNT{2'd2}}, + parameter M_B_REG_TYPE = {M_COUNT{2'd0}}, + parameter M_AR_REG_TYPE = {M_COUNT{2'd1}}, + parameter M_R_REG_TYPE = {M_COUNT{2'd0}} + /* verilator lint_on WIDTHTRUNC */ +) +(); + +logic clk; +logic rst; + +taxi_axi_if #( + .DATA_W(DATA_W), + .ADDR_W(ADDR_W), + .STRB_W(STRB_W), + .ID_W(S_ID_W), + .AWUSER_EN(AWUSER_EN), + .AWUSER_W(AWUSER_W), + .WUSER_EN(WUSER_EN), + .WUSER_W(WUSER_W), + .BUSER_EN(BUSER_EN), + .BUSER_W(BUSER_W), + .ARUSER_EN(ARUSER_EN), + .ARUSER_W(ARUSER_W), + .RUSER_EN(RUSER_EN), + .RUSER_W(RUSER_W) +) s_axi[S_COUNT](); + +taxi_axi_if #( + .DATA_W(DATA_W), + .ADDR_W(ADDR_W), + .STRB_W(STRB_W), + .ID_W(M_ID_W), + .AWUSER_EN(AWUSER_EN), + .AWUSER_W(AWUSER_W), + .WUSER_EN(WUSER_EN), + .WUSER_W(WUSER_W), + .BUSER_EN(BUSER_EN), + .BUSER_W(BUSER_W), + .ARUSER_EN(ARUSER_EN), + .ARUSER_W(ARUSER_W), + .RUSER_EN(RUSER_EN), + .RUSER_W(RUSER_W) +) m_axi[M_COUNT](); + +taxi_axi_crossbar #( + .S_COUNT(S_COUNT), + .M_COUNT(M_COUNT), + .ADDR_W(ADDR_W), + .S_THREADS(S_THREADS), + .S_ACCEPT(S_ACCEPT), + .M_REGIONS(M_REGIONS), + .M_BASE_ADDR(M_BASE_ADDR), + .M_ADDR_W(M_ADDR_W), + .M_CONNECT_RD(M_CONNECT_RD), + .M_CONNECT_WR(M_CONNECT_WR), + .M_ISSUE(M_ISSUE), + .M_SECURE(M_SECURE), + .S_AW_REG_TYPE(S_AW_REG_TYPE), + .S_W_REG_TYPE(S_W_REG_TYPE), + .S_B_REG_TYPE(S_B_REG_TYPE), + .S_AR_REG_TYPE(S_AR_REG_TYPE), + .S_R_REG_TYPE(S_R_REG_TYPE), + .M_AW_REG_TYPE(M_AW_REG_TYPE), + .M_W_REG_TYPE(M_W_REG_TYPE), + .M_B_REG_TYPE(M_B_REG_TYPE), + .M_AR_REG_TYPE(M_AR_REG_TYPE), + .M_R_REG_TYPE(M_R_REG_TYPE) +) +uut ( + .clk(clk), + .rst(rst), + + /* + * AXI4 slave interface + */ + .s_axi_wr(s_axi), + .s_axi_rd(s_axi), + + /* + * AXI4 master interface + */ + .m_axi_wr(m_axi), + .m_axi_rd(m_axi) +); + +endmodule + +`resetall