From 2174a035900b97a586bf0b61d0053256f6dba506 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Tue, 12 May 2026 00:11:29 -0700 Subject: [PATCH] eth: Optimize 10G/25G MAC TX CRC computation Signed-off-by: Alex Forencich --- src/eth/rtl/taxi_axis_baser_tx_32.sv | 110 ++++++++++------------- src/eth/rtl/taxi_axis_baser_tx_64.sv | 126 ++++++++++++--------------- src/eth/rtl/taxi_axis_xgmii_tx_32.sv | 109 ++++++++++------------- src/eth/rtl/taxi_axis_xgmii_tx_64.sv | 123 ++++++++++++-------------- 4 files changed, 210 insertions(+), 258 deletions(-) diff --git a/src/eth/rtl/taxi_axis_baser_tx_32.sv b/src/eth/rtl/taxi_axis_baser_tx_32.sv index 54b5dbd..9970716 100644 --- a/src/eth/rtl/taxi_axis_baser_tx_32.sv +++ b/src/eth/rtl/taxi_axis_baser_tx_32.sv @@ -1,7 +1,7 @@ // SPDX-License-Identifier: CERN-OHL-S-2.0 /* -Copyright (c) 2025 FPGA Ninja, LLC +Copyright (c) 2025-2026 FPGA Ninja, LLC Authors: - Alex Forencich @@ -168,10 +168,6 @@ typedef enum logic [3:0] { state_t state_reg = STATE_IDLE, state_next; -// datapath control signals -logic reset_crc; -logic update_crc; - logic [DATA_W-1:0] s_tdata_reg = '0, s_tdata_next; logic [EMPTY_W-1:0] s_empty_reg = '0, s_empty_next; @@ -206,9 +202,6 @@ logic [PTP_TS_W-1:0] m_axis_tx_cpl_ts_reg = '0, m_axis_tx_cpl_ts_next; logic [TX_TAG_W-1:0] m_axis_tx_cpl_tag_reg = '0, m_axis_tx_cpl_tag_next; logic m_axis_tx_cpl_valid_reg = 1'b0, m_axis_tx_cpl_valid_next; -logic [31:0] crc_state_reg[4]; -wire [31:0] crc_state[4]; - logic [DATA_W-1:0] encoded_tx_data_reg = {24'd0, BLOCK_TYPE_CTRL}; logic encoded_tx_data_valid_reg = 1'b0; logic [HDR_W-1:0] encoded_tx_hdr_reg = SYNC_CTRL; @@ -265,26 +258,28 @@ assign stat_tx_err_oversize = stat_tx_err_oversize_reg; assign stat_tx_err_user = stat_tx_err_user_reg; assign stat_tx_err_underflow = stat_tx_err_underflow_reg; -for (genvar n = 0; n < 4; n = n + 1) begin : crc +logic [DATA_W+24-1:0] crc_data_reg = '0, crc_data_next; +logic [31:0] crc_state_reg = '0; +wire [31:0] crc_state; - taxi_lfsr #( - .LFSR_W(32), - .LFSR_POLY(32'h4c11db7), - .LFSR_GALOIS(1), - .LFSR_FEED_FORWARD(0), - .REVERSE(1), - .DATA_W(8*(n+1)), - .DATA_IN_EN(1'b1), - .DATA_OUT_EN(1'b0) - ) - eth_crc ( - .data_in(s_tdata_reg[0 +: 8*(n+1)]), - .state_in(crc_state_reg[3]), - .data_out(), - .state_out(crc_state[n]) - ); - -end +taxi_lfsr #( + .LFSR_W(32), + .LFSR_POLY(32'h4c11db7), + .LFSR_GALOIS(1), + .LFSR_FEED_FORWARD(0), + .REVERSE(1), + .DATA_W(DATA_W+24), + .DATA_IN_EN(1'b1), + .DATA_OUT_EN(1'b0), + .STATE_SHIFT_PRE(0), + .STATE_SHIFT_POST(-24) +) +eth_crc ( + .data_in(crc_data_reg), + .state_in('0), + .data_out(), + .state_out(crc_state) +); function [1:0] keep2empty(input [3:0] k); casez (k) @@ -296,35 +291,28 @@ function [1:0] keep2empty(input [3:0] k); endcase endfunction -// Mask input data -wire [DATA_W-1:0] s_axis_tx_tdata_masked; - -for (genvar n = 0; n < KEEP_W; n = n + 1) begin - assign s_axis_tx_tdata_masked[n*8 +: 8] = (n == 0 || s_axis_tx.tkeep[n]) ? s_axis_tx.tdata[n*8 +: 8] : 8'd0; -end - // FCS cycle calculation always_comb begin casez (s_empty_reg) 2'd3: begin - fcs_output_data_0 = {~crc_state[0][23:0], s_tdata_reg[7:0]}; - fcs_output_data_1 = {24'd0, ~crc_state_reg[0][31:24]}; + fcs_output_data_0 = {~crc_state[23:0], s_tdata_reg[7:0]}; + fcs_output_data_1 = {24'd0, ~crc_state_reg[31:24]}; fcs_output_type_0 = OUTPUT_TYPE_DATA; fcs_output_type_1 = OUTPUT_TYPE_TERM_1; ifg_offset = 8'd3; extra_cycle = 1'b0; end 2'd2: begin - fcs_output_data_0 = {~crc_state[1][15:0], s_tdata_reg[15:0]}; - fcs_output_data_1 = {16'd0, ~crc_state_reg[1][31:16]}; + fcs_output_data_0 = {~crc_state[15:0], s_tdata_reg[15:0]}; + fcs_output_data_1 = {16'd0, ~crc_state_reg[31:16]}; fcs_output_type_0 = OUTPUT_TYPE_DATA; fcs_output_type_1 = OUTPUT_TYPE_TERM_2; ifg_offset = 8'd2; extra_cycle = 1'b0; end 2'd1: begin - fcs_output_data_0 = {~crc_state[2][7:0], s_tdata_reg[23:0]}; - fcs_output_data_1 = {8'd0, ~crc_state_reg[2][31:8]}; + fcs_output_data_0 = {~crc_state[7:0], s_tdata_reg[23:0]}; + fcs_output_data_1 = {8'd0, ~crc_state_reg[31:8]}; fcs_output_type_0 = OUTPUT_TYPE_DATA; fcs_output_type_1 = OUTPUT_TYPE_TERM_3; ifg_offset = 8'd1; @@ -332,7 +320,7 @@ always_comb begin end 2'd0: begin fcs_output_data_0 = s_tdata_reg; - fcs_output_data_1 = ~crc_state_reg[3]; + fcs_output_data_1 = ~crc_state_reg; fcs_output_type_0 = OUTPUT_TYPE_DATA; fcs_output_type_1 = OUTPUT_TYPE_DATA; ifg_offset = 8'd4; @@ -344,9 +332,6 @@ end always_comb begin state_next = STATE_IDLE; - reset_crc = 1'b0; - update_crc = 1'b0; - frame_next = frame_reg; frame_error_next = frame_error_reg; frame_oversize_next = frame_oversize_reg; @@ -368,6 +353,8 @@ always_comb begin s_tdata_next = s_tdata_reg; s_empty_next = s_empty_reg; + crc_data_next = crc_data_reg; + m_axis_tx_cpl_ts_next = m_axis_tx_cpl_ts_reg; m_axis_tx_cpl_tag_next = m_axis_tx_cpl_tag_reg; m_axis_tx_cpl_valid_next = 1'b0; @@ -450,6 +437,14 @@ always_comb begin ifg_cnt_next = '0; end + // FCS + casez (s_axis_tx.tkeep) + 4'b1111: crc_data_next = {24'd0, s_axis_tx.tdata} ^ {24'd0, crc_state}; + 4'b0111: crc_data_next = {24'd0, s_axis_tx.tdata[23:0], 8'd0} ^ {16'd0, crc_state, 8'd0}; + 4'bz011: crc_data_next = {24'd0, s_axis_tx.tdata[15:0], 16'd0} ^ {8'd0, crc_state, 16'd0}; + default: crc_data_next = {24'd0, s_axis_tx.tdata[7:0], 24'd0} ^ {crc_state, 24'd0}; + endcase + case (state_reg) STATE_IDLE: begin // idle state - wait for data @@ -459,12 +454,11 @@ always_comb begin frame_len_next = 0; {frame_len_lim_cyc_next, frame_len_lim_last_next} = cfg_tx_max_pkt_len; frame_len_lim_check_next = 1'b0; - reset_crc = 1'b1; output_data_next = s_tdata_reg; output_type_next = OUTPUT_TYPE_IDLE; - s_tdata_next = s_axis_tx_tdata_masked; + s_tdata_next = s_axis_tx.tdata; s_empty_next = keep2empty(s_axis_tx.tkeep); m_axis_tx_cpl_tag_next = s_axis_tx.tid; @@ -483,14 +477,15 @@ always_comb begin end STATE_PREAMBLE: begin // send preamble - reset_crc = 1'b1; hdr_ptr_next = 0; frame_len_next = 0; - s_tdata_next = s_axis_tx_tdata_masked; + s_tdata_next = s_axis_tx.tdata; s_empty_next = keep2empty(s_axis_tx.tkeep); + crc_data_next = {24'd0, s_axis_tx.tdata} ^ {24'd0, 32'hffffffff}; + output_data_next = {ETH_SFD, {3{ETH_PRE}}}; output_type_next = OUTPUT_TYPE_DATA; @@ -500,13 +495,12 @@ always_comb begin end STATE_PAYLOAD: begin // transfer payload - update_crc = 1'b1; s_axis_tx_tready_next = 1'b1; output_data_next = s_tdata_reg; output_type_next = OUTPUT_TYPE_DATA; - s_tdata_next = s_axis_tx_tdata_masked; + s_tdata_next = s_axis_tx.tdata; s_empty_next = keep2empty(s_axis_tx.tkeep); stat_tx_byte_next = 3'(KEEP_W); @@ -544,8 +538,6 @@ always_comb begin stat_tx_byte_next = 3'(KEEP_W); - update_crc = 1'b1; - ifg_count_next = (cfg_tx_ifg > 8'd12 ? cfg_tx_ifg : 8'd12) - ifg_offset + 8'(deficit_idle_count_reg); if (frame_error_reg) begin state_next = STATE_ERR; @@ -689,6 +681,8 @@ always_ff @(posedge clk) begin s_tdata_reg <= s_tdata_next; s_empty_reg <= s_empty_next; + crc_data_reg <= crc_data_next; + s_axis_tx_tready_reg <= s_axis_tx_tready_next; m_axis_tx_cpl_ts_reg <= m_axis_tx_cpl_ts_next; @@ -817,17 +811,7 @@ always_ff @(posedge clk) begin phase_reg <= 1'b1; end - for (integer i = 0; i < 3; i = i + 1) begin - crc_state_reg[i] <= crc_state[i]; - end - - if (update_crc) begin - crc_state_reg[3] <= crc_state[3]; - end - - if (reset_crc) begin - crc_state_reg[3] <= '1; - end + crc_state_reg <= crc_state; end tx_gbx_sync_reg <= tx_gbx_req_sync; diff --git a/src/eth/rtl/taxi_axis_baser_tx_64.sv b/src/eth/rtl/taxi_axis_baser_tx_64.sv index a134a95..0d15a74 100644 --- a/src/eth/rtl/taxi_axis_baser_tx_64.sv +++ b/src/eth/rtl/taxi_axis_baser_tx_64.sv @@ -1,7 +1,7 @@ // SPDX-License-Identifier: CERN-OHL-S-2.0 /* -Copyright (c) 2019-2025 FPGA Ninja, LLC +Copyright (c) 2019-2026 FPGA Ninja, LLC Authors: - Alex Forencich @@ -172,10 +172,6 @@ typedef enum logic [2:0] { state_t state_reg = STATE_IDLE, state_next; -// datapath control signals -logic reset_crc; -logic update_crc; - logic swap_lanes_reg = 1'b0, swap_lanes_next; logic swap_lanes_d1_reg = 1'b0; logic [31:0] swap_data_reg = 32'd0; @@ -218,9 +214,6 @@ logic m_axis_tx_cpl_valid_reg = 1'b0; logic m_axis_tx_cpl_valid_int_reg = 1'b0; logic m_axis_tx_cpl_ts_borrow_reg = 1'b0; -logic [31:0] crc_state_reg[8]; -wire [31:0] crc_state[8]; - logic [DATA_W-1:0] encoded_tx_data_reg = {{8{CTRL_IDLE}}, BLOCK_TYPE_CTRL}; logic encoded_tx_data_valid_reg = 1'b0; logic [HDR_W-1:0] encoded_tx_hdr_reg = SYNC_CTRL; @@ -278,26 +271,28 @@ assign stat_tx_err_oversize = stat_tx_err_oversize_reg; assign stat_tx_err_user = stat_tx_err_user_reg; assign stat_tx_err_underflow = stat_tx_err_underflow_reg; -for (genvar n = 0; n < 8; n = n + 1) begin : crc +logic [DATA_W+24-1:0] crc_data_reg, crc_data_next; +reg [31:0] crc_state_reg = '0; +wire [31:0] crc_state; - taxi_lfsr #( - .LFSR_W(32), - .LFSR_POLY(32'h4c11db7), - .LFSR_GALOIS(1), - .LFSR_FEED_FORWARD(0), - .REVERSE(1), - .DATA_W(8*(n+1)), - .DATA_IN_EN(1'b1), - .DATA_OUT_EN(1'b0) - ) - eth_crc ( - .data_in(s_tdata_reg[0 +: 8*(n+1)]), - .state_in(crc_state_reg[7]), - .data_out(), - .state_out(crc_state[n]) - ); - -end +taxi_lfsr #( + .LFSR_W(32), + .LFSR_POLY(32'h4c11db7), + .LFSR_GALOIS(1), + .LFSR_FEED_FORWARD(0), + .REVERSE(1), + .DATA_W(DATA_W+24), + .DATA_IN_EN(1'b1), + .DATA_OUT_EN(1'b0), + .STATE_SHIFT_PRE(0), + .STATE_SHIFT_POST(-24) +) +eth_crc ( + .data_in(crc_data_reg), + .state_in('0), + .data_out(), + .state_out(crc_state) +); function [2:0] keep2empty(input [7:0] k); casez (k) @@ -313,68 +308,61 @@ function [2:0] keep2empty(input [7:0] k); endcase endfunction -// Mask input data -wire [DATA_W-1:0] s_axis_tx_tdata_masked; - -for (genvar n = 0; n < KEEP_W; n = n + 1) begin - assign s_axis_tx_tdata_masked[n*8 +: 8] = (n == 0 || s_axis_tx.tkeep[n]) ? s_axis_tx.tdata[n*8 +: 8] : 8'd0; -end - // FCS cycle calculation always_comb begin casez (s_empty_reg) 3'd7: begin - fcs_output_data_0 = {24'd0, ~crc_state[0][31:0], s_tdata_reg[7:0]}; + fcs_output_data_0 = {24'd0, ~crc_state[31:0], s_tdata_reg[7:0]}; fcs_output_data_1 = 64'd0; fcs_output_type_0 = OUTPUT_TYPE_TERM_5; fcs_output_type_1 = OUTPUT_TYPE_IDLE; ifg_offset = 8'd3; end 3'd6: begin - fcs_output_data_0 = {16'd0, ~crc_state[1][31:0], s_tdata_reg[15:0]}; + fcs_output_data_0 = {16'd0, ~crc_state[31:0], s_tdata_reg[15:0]}; fcs_output_data_1 = 64'd0; fcs_output_type_0 = OUTPUT_TYPE_TERM_6; fcs_output_type_1 = OUTPUT_TYPE_IDLE; ifg_offset = 8'd2; end 3'd5: begin - fcs_output_data_0 = {8'd0, ~crc_state[2][31:0], s_tdata_reg[23:0]}; + fcs_output_data_0 = {8'd0, ~crc_state[31:0], s_tdata_reg[23:0]}; fcs_output_data_1 = 64'd0; fcs_output_type_0 = OUTPUT_TYPE_TERM_7; fcs_output_type_1 = OUTPUT_TYPE_IDLE; ifg_offset = 8'd1; end 3'd4: begin - fcs_output_data_0 = {~crc_state[3][31:0], s_tdata_reg[31:0]}; + fcs_output_data_0 = {~crc_state[31:0], s_tdata_reg[31:0]}; fcs_output_data_1 = 64'd0; fcs_output_type_0 = OUTPUT_TYPE_DATA; fcs_output_type_1 = OUTPUT_TYPE_TERM_0; ifg_offset = 8'd8; end 3'd3: begin - fcs_output_data_0 = {~crc_state[4][23:0], s_tdata_reg[39:0]}; - fcs_output_data_1 = {56'd0, ~crc_state_reg[4][31:24]}; + fcs_output_data_0 = {~crc_state[23:0], s_tdata_reg[39:0]}; + fcs_output_data_1 = {56'd0, ~crc_state_reg[31:24]}; fcs_output_type_0 = OUTPUT_TYPE_DATA; fcs_output_type_1 = OUTPUT_TYPE_TERM_1; ifg_offset = 8'd7; end 3'd2: begin - fcs_output_data_0 = {~crc_state[5][15:0], s_tdata_reg[47:0]}; - fcs_output_data_1 = {48'd0, ~crc_state_reg[5][31:16]}; + fcs_output_data_0 = {~crc_state[15:0], s_tdata_reg[47:0]}; + fcs_output_data_1 = {48'd0, ~crc_state_reg[31:16]}; fcs_output_type_0 = OUTPUT_TYPE_DATA; fcs_output_type_1 = OUTPUT_TYPE_TERM_2; ifg_offset = 8'd6; end 3'd1: begin - fcs_output_data_0 = {~crc_state[6][7:0], s_tdata_reg[55:0]}; - fcs_output_data_1 = {40'd0, ~crc_state_reg[6][31:8]}; + fcs_output_data_0 = {~crc_state[7:0], s_tdata_reg[55:0]}; + fcs_output_data_1 = {40'd0, ~crc_state_reg[31:8]}; fcs_output_type_0 = OUTPUT_TYPE_DATA; fcs_output_type_1 = OUTPUT_TYPE_TERM_3; ifg_offset = 8'd5; end 3'd0: begin fcs_output_data_0 = s_tdata_reg; - fcs_output_data_1 = {32'd0, ~crc_state_reg[7][31:0]}; + fcs_output_data_1 = {32'd0, ~crc_state_reg[31:0]}; fcs_output_type_0 = OUTPUT_TYPE_DATA; fcs_output_type_1 = OUTPUT_TYPE_TERM_4; ifg_offset = 8'd4; @@ -385,8 +373,6 @@ end always_comb begin state_next = STATE_IDLE; - reset_crc = 1'b0; - update_crc = 1'b0; swap_lanes_next = swap_lanes_reg; @@ -412,6 +398,8 @@ always_comb begin s_tdata_next = s_tdata_reg; s_empty_next = s_empty_reg; + crc_data_next = crc_data_reg; + m_axis_tx_cpl_tag_next = m_axis_tx_cpl_tag_reg; output_data_next = s_tdata_reg; @@ -485,6 +473,18 @@ always_comb begin ifg_cnt_next = '0; end + // FCS + casez (s_axis_tx.tkeep) + 8'b11111111: crc_data_next = {24'd0, s_axis_tx.tdata} ^ {56'd0, crc_state}; + 8'b01111111: crc_data_next = {24'd0, s_axis_tx.tdata[55:0], 8'd0} ^ {48'd0, crc_state, 8'd0}; + 8'bz0111111: crc_data_next = {24'd0, s_axis_tx.tdata[47:0], 16'd0} ^ {40'd0, crc_state, 16'd0}; + 8'bz0011111: crc_data_next = {24'd0, s_axis_tx.tdata[39:0], 24'd0} ^ {32'd0, crc_state, 24'd0}; + 8'bzzz01111: crc_data_next = {24'd0, s_axis_tx.tdata[31:0], 32'd0} ^ {24'd0, crc_state, 32'd0}; + 8'bzzzz0111: crc_data_next = {24'd0, s_axis_tx.tdata[23:0], 40'd0} ^ {16'd0, crc_state, 40'd0}; + 8'bzzzzz011: crc_data_next = {24'd0, s_axis_tx.tdata[15:0], 48'd0} ^ {8'd0, crc_state, 48'd0}; + default: crc_data_next = {24'd0, s_axis_tx.tdata[7:0], 56'd0} ^ {crc_state, 56'd0}; + endcase + case (state_reg) STATE_IDLE: begin // idle state - wait for data @@ -494,15 +494,16 @@ always_comb begin frame_len_next = 0; {frame_len_lim_cyc_next, frame_len_lim_last_next} = cfg_tx_max_pkt_len ^ 4; frame_len_lim_check_next = 1'b0; - reset_crc = 1'b1; s_axis_tx_tready_next = cfg_tx_enable; output_data_next = s_tdata_reg; output_type_next = OUTPUT_TYPE_IDLE; - s_tdata_next = s_axis_tx_tdata_masked; + s_tdata_next = s_axis_tx.tdata; s_empty_next = keep2empty(s_axis_tx.tkeep); + crc_data_next = {24'd0, s_axis_tx.tdata} ^ {56'd0, 32'hffffffff}; + m_axis_tx_cpl_tag_next = s_axis_tx.tid; if (s_axis_tx.tvalid && s_axis_tx.tready) begin @@ -521,13 +522,12 @@ always_comb begin end STATE_PAYLOAD: begin // transfer payload - update_crc = 1'b1; s_axis_tx_tready_next = 1'b1; output_data_next = s_tdata_reg; output_type_next = OUTPUT_TYPE_DATA; - s_tdata_next = s_axis_tx_tdata_masked; + s_tdata_next = s_axis_tx.tdata; s_empty_next = keep2empty(s_axis_tx.tkeep); stat_tx_byte_next = 4'(KEEP_W); @@ -567,8 +567,6 @@ always_comb begin output_data_next = fcs_output_data_0; output_type_next = fcs_output_type_0; - update_crc = 1'b1; - ifg_count_next = (cfg_tx_ifg > 8'd12 ? cfg_tx_ifg : 8'd12) - ifg_offset + (swap_lanes_reg ? 8'd4 : 8'd0) + 8'(deficit_idle_count_reg); if (s_empty_reg <= 4) begin stat_tx_byte_next = 4'(KEEP_W); @@ -598,8 +596,6 @@ always_comb begin stat_tx_byte_next = 4-s_empty_reg; frame_len_next = frame_len_reg + 16'(4-s_empty_reg); - reset_crc = 1'b1; - stat_tx_pkt_len_next = frame_len_next; stat_tx_pkt_good_next = !frame_error_reg; stat_tx_pkt_bad_next = frame_error_reg; @@ -609,6 +605,8 @@ always_comb begin stat_tx_pkt_vlan_next = is_8021q_reg; stat_tx_err_oversize_next = frame_oversize_reg; + crc_data_next = {24'd0, s_axis_tx.tdata} ^ {56'd0, 32'hffffffff}; + if (DIC_EN) begin if (ifg_count_next > 8'd7) begin state_next = STATE_IFG; @@ -661,14 +659,14 @@ always_comb begin output_data_next = s_tdata_reg; output_type_next = OUTPUT_TYPE_IDLE; + crc_data_next = {24'd0, s_axis_tx.tdata} ^ {56'd0, 32'hffffffff}; + if (ifg_count_reg > 8'd8) begin ifg_count_next = ifg_count_reg - 8'd8; end else begin ifg_count_next = 8'd0; end - reset_crc = 1'b1; - if (DIC_EN) begin if (ifg_count_next > 8'd7 || frame_reg) begin state_next = STATE_IFG; @@ -727,6 +725,8 @@ always_ff @(posedge clk) begin s_tdata_reg <= s_tdata_next; s_empty_reg <= s_empty_next; + crc_data_reg <= crc_data_next; + s_axis_tx_tready_reg <= s_axis_tx_tready_next; m_axis_tx_cpl_tag_reg <= m_axis_tx_cpl_tag_next; @@ -955,17 +955,7 @@ always_ff @(posedge clk) begin encoded_tx_data_valid_reg <= 1'b1; encoded_tx_hdr_valid_reg <= 1'b1; - for (integer i = 0; i < 7; i = i + 1) begin - crc_state_reg[i] <= crc_state[i]; - end - - if (update_crc) begin - crc_state_reg[7] <= crc_state[7]; - end - - if (reset_crc) begin - crc_state_reg[7] <= '1; - end + crc_state_reg <= crc_state; end tx_gbx_sync_reg <= tx_gbx_req_sync; diff --git a/src/eth/rtl/taxi_axis_xgmii_tx_32.sv b/src/eth/rtl/taxi_axis_xgmii_tx_32.sv index 86b9c8a..5c747b1 100644 --- a/src/eth/rtl/taxi_axis_xgmii_tx_32.sv +++ b/src/eth/rtl/taxi_axis_xgmii_tx_32.sv @@ -1,7 +1,7 @@ // SPDX-License-Identifier: CERN-OHL-S-2.0 /* -Copyright (c) 2015-2025 FPGA Ninja, LLC +Copyright (c) 2015-2026 FPGA Ninja, LLC Authors: - Alex Forencich @@ -129,10 +129,6 @@ typedef enum logic [3:0] { state_t state_reg = STATE_IDLE, state_next; -// datapath control signals -logic reset_crc; -logic update_crc; - logic [DATA_W-1:0] s_tdata_reg = '0, s_tdata_next; logic [EMPTY_W-1:0] s_empty_reg = '0, s_empty_next; @@ -167,9 +163,6 @@ logic [PTP_TS_W-1:0] m_axis_tx_cpl_ts_reg = '0, m_axis_tx_cpl_ts_next; logic [TX_TAG_W-1:0] m_axis_tx_cpl_tag_reg = '0, m_axis_tx_cpl_tag_next; logic m_axis_tx_cpl_valid_reg = 1'b0, m_axis_tx_cpl_valid_next; -logic [31:0] crc_state_reg[4]; -wire [31:0] crc_state[4]; - logic [DATA_W-1:0] xgmii_txd_reg = {CTRL_W{XGMII_IDLE}}, xgmii_txd_next; logic [CTRL_W-1:0] xgmii_txc_reg = {CTRL_W{1'b1}}, xgmii_txc_next; logic xgmii_tx_valid_reg = 1'b0; @@ -219,26 +212,28 @@ assign stat_tx_err_oversize = stat_tx_err_oversize_reg; assign stat_tx_err_user = stat_tx_err_user_reg; assign stat_tx_err_underflow = stat_tx_err_underflow_reg; -for (genvar n = 0; n < 4; n = n + 1) begin : crc +logic [DATA_W+24-1:0] crc_data_reg = '0, crc_data_next; +logic [31:0] crc_state_reg = '0; +wire [31:0] crc_state; - taxi_lfsr #( - .LFSR_W(32), - .LFSR_POLY(32'h4c11db7), - .LFSR_GALOIS(1), - .LFSR_FEED_FORWARD(0), - .REVERSE(1), - .DATA_W(8*(n+1)), - .DATA_IN_EN(1'b1), - .DATA_OUT_EN(1'b0) - ) - eth_crc ( - .data_in(s_tdata_reg[0 +: 8*(n+1)]), - .state_in(crc_state_reg[3]), - .data_out(), - .state_out(crc_state[n]) - ); - -end +taxi_lfsr #( + .LFSR_W(32), + .LFSR_POLY(32'h4c11db7), + .LFSR_GALOIS(1), + .LFSR_FEED_FORWARD(0), + .REVERSE(1), + .DATA_W(DATA_W+24), + .DATA_IN_EN(1'b1), + .DATA_OUT_EN(1'b0), + .STATE_SHIFT_PRE(0), + .STATE_SHIFT_POST(-24) +) +eth_crc ( + .data_in(crc_data_reg), + .state_in('0), + .data_out(), + .state_out(crc_state) +); function [1:0] keep2empty(input [3:0] k); casez (k) @@ -250,35 +245,28 @@ function [1:0] keep2empty(input [3:0] k); endcase endfunction -// Mask input data -wire [DATA_W-1:0] s_axis_tx_tdata_masked; - -for (genvar n = 0; n < KEEP_W; n = n + 1) begin - assign s_axis_tx_tdata_masked[n*8 +: 8] = (n == 0 || s_axis_tx.tkeep[n]) ? s_axis_tx.tdata[n*8 +: 8] : 8'd0; -end - // FCS cycle calculation always_comb begin casez (s_empty_reg) 2'd3: begin - fcs_output_txd_0 = {~crc_state[0][23:0], s_tdata_reg[7:0]}; - fcs_output_txd_1 = {{2{XGMII_IDLE}}, XGMII_TERM, ~crc_state_reg[0][31:24]}; + fcs_output_txd_0 = {~crc_state[23:0], s_tdata_reg[7:0]}; + fcs_output_txd_1 = {{2{XGMII_IDLE}}, XGMII_TERM, ~crc_state_reg[31:24]}; fcs_output_txc_0 = 4'b0000; fcs_output_txc_1 = 4'b1110; ifg_offset = 8'd3; extra_cycle = 1'b0; end 2'd2: begin - fcs_output_txd_0 = {~crc_state[1][15:0], s_tdata_reg[15:0]}; - fcs_output_txd_1 = {XGMII_IDLE, XGMII_TERM, ~crc_state_reg[1][31:16]}; + fcs_output_txd_0 = {~crc_state[15:0], s_tdata_reg[15:0]}; + fcs_output_txd_1 = {XGMII_IDLE, XGMII_TERM, ~crc_state_reg[31:16]}; fcs_output_txc_0 = 4'b0000; fcs_output_txc_1 = 4'b1100; ifg_offset = 8'd2; extra_cycle = 1'b0; end 2'd1: begin - fcs_output_txd_0 = {~crc_state[2][7:0], s_tdata_reg[23:0]}; - fcs_output_txd_1 = {XGMII_TERM, ~crc_state_reg[2][31:8]}; + fcs_output_txd_0 = {~crc_state[7:0], s_tdata_reg[23:0]}; + fcs_output_txd_1 = {XGMII_TERM, ~crc_state_reg[31:8]}; fcs_output_txc_0 = 4'b0000; fcs_output_txc_1 = 4'b1000; ifg_offset = 8'd1; @@ -286,7 +274,7 @@ always_comb begin end 2'd0: begin fcs_output_txd_0 = s_tdata_reg; - fcs_output_txd_1 = ~crc_state_reg[3]; + fcs_output_txd_1 = ~crc_state_reg; fcs_output_txc_0 = 4'b0000; fcs_output_txc_1 = 4'b0000; ifg_offset = 8'd4; @@ -298,8 +286,6 @@ end always_comb begin state_next = STATE_IDLE; - reset_crc = 1'b0; - update_crc = 1'b0; frame_next = frame_reg; frame_error_next = frame_error_reg; @@ -322,6 +308,8 @@ always_comb begin s_tdata_next = s_tdata_reg; s_empty_next = s_empty_reg; + crc_data_next = crc_data_reg; + m_axis_tx_cpl_ts_next = m_axis_tx_cpl_ts_reg; m_axis_tx_cpl_tag_next = m_axis_tx_cpl_tag_reg; m_axis_tx_cpl_valid_next = 1'b0; @@ -406,6 +394,14 @@ always_comb begin ifg_cnt_next = '0; end + // FCS + casez (s_axis_tx.tkeep) + 4'b1111: crc_data_next = {24'd0, s_axis_tx.tdata} ^ {24'd0, crc_state}; + 4'b0111: crc_data_next = {24'd0, s_axis_tx.tdata[23:0], 8'd0} ^ {16'd0, crc_state, 8'd0}; + 4'bz011: crc_data_next = {24'd0, s_axis_tx.tdata[15:0], 16'd0} ^ {8'd0, crc_state, 16'd0}; + default: crc_data_next = {24'd0, s_axis_tx.tdata[7:0], 24'd0} ^ {crc_state, 24'd0}; + endcase + case (state_reg) STATE_IDLE: begin // idle state - wait for data @@ -415,13 +411,12 @@ always_comb begin frame_len_next = 0; {frame_len_lim_cyc_next, frame_len_lim_last_next} = cfg_tx_max_pkt_len; frame_len_lim_check_next = 1'b0; - reset_crc = 1'b1; // XGMII idle xgmii_txd_next = {CTRL_W{XGMII_IDLE}}; xgmii_txc_next = {CTRL_W{1'b1}}; - s_tdata_next = s_axis_tx_tdata_masked; + s_tdata_next = s_axis_tx.tdata; s_empty_next = keep2empty(s_axis_tx.tkeep); m_axis_tx_cpl_tag_next = s_axis_tx.tid; @@ -440,14 +435,15 @@ always_comb begin end STATE_PREAMBLE: begin // send preamble - reset_crc = 1'b1; hdr_ptr_next = 0; frame_len_next = 0; - s_tdata_next = s_axis_tx_tdata_masked; + s_tdata_next = s_axis_tx.tdata; s_empty_next = keep2empty(s_axis_tx.tkeep); + crc_data_next = {24'd0, s_axis_tx.tdata} ^ {24'd0, 32'hffffffff}; + xgmii_txd_next = {ETH_SFD, {3{ETH_PRE}}}; xgmii_txc_next = {CTRL_W{1'b0}}; @@ -457,13 +453,12 @@ always_comb begin end STATE_PAYLOAD: begin // transfer payload - update_crc = 1'b1; s_axis_tx_tready_next = 1'b1; xgmii_txd_next = s_tdata_reg; xgmii_txc_next = {CTRL_W{1'b0}}; - s_tdata_next = s_axis_tx_tdata_masked; + s_tdata_next = s_axis_tx.tdata; s_empty_next = keep2empty(s_axis_tx.tkeep); stat_tx_byte_next = 3'(CTRL_W); @@ -501,8 +496,6 @@ always_comb begin stat_tx_byte_next = 3'(CTRL_W); - update_crc = 1'b1; - ifg_count_next = (cfg_tx_ifg > 8'd12 ? cfg_tx_ifg : 8'd12) - ifg_offset + 8'(deficit_idle_count_reg); if (frame_error_reg) begin state_next = STATE_ERR; @@ -648,6 +641,8 @@ always_ff @(posedge clk) begin s_tdata_reg <= s_tdata_next; s_empty_reg <= s_empty_next; + crc_data_reg <= crc_data_next; + s_axis_tx_tready_reg <= s_axis_tx_tready_next; m_axis_tx_cpl_ts_reg <= m_axis_tx_cpl_ts_next; @@ -657,17 +652,7 @@ always_ff @(posedge clk) begin if (GBX_IF_EN && tx_gbx_req_stall) begin // gearbox stall end else begin - for (integer i = 0; i < 3; i = i + 1) begin - crc_state_reg[i] <= crc_state[i]; - end - end - - if (update_crc) begin - crc_state_reg[3] <= crc_state[3]; - end - - if (reset_crc) begin - crc_state_reg[3] <= '1; + crc_state_reg <= crc_state; end xgmii_txd_reg <= xgmii_txd_next; diff --git a/src/eth/rtl/taxi_axis_xgmii_tx_64.sv b/src/eth/rtl/taxi_axis_xgmii_tx_64.sv index 3643d4d..7116800 100644 --- a/src/eth/rtl/taxi_axis_xgmii_tx_64.sv +++ b/src/eth/rtl/taxi_axis_xgmii_tx_64.sv @@ -1,7 +1,7 @@ // SPDX-License-Identifier: CERN-OHL-S-2.0 /* -Copyright (c) 2015-2025 FPGA Ninja, LLC +Copyright (c) 2015-2026 FPGA Ninja, LLC Authors: - Alex Forencich @@ -128,10 +128,6 @@ typedef enum logic [2:0] { state_t state_reg = STATE_IDLE, state_next; -// datapath control signals -logic reset_crc; -logic update_crc; - logic swap_lanes_reg = 1'b0, swap_lanes_next; logic [31:0] swap_txd_reg = 32'd0; logic [3:0] swap_txc_reg = 4'd0; @@ -172,9 +168,6 @@ logic m_axis_tx_cpl_valid_reg = 1'b0; logic m_axis_tx_cpl_valid_int_reg = 1'b0; logic m_axis_tx_cpl_ts_borrow_reg = 1'b0; -logic [31:0] crc_state_reg[8]; -wire [31:0] crc_state[8]; - logic [4+16-1:0] last_ts_reg = '0; logic [4+16-1:0] ts_inc_reg = '0; @@ -227,26 +220,28 @@ assign stat_tx_err_oversize = stat_tx_err_oversize_reg; assign stat_tx_err_user = stat_tx_err_user_reg; assign stat_tx_err_underflow = stat_tx_err_underflow_reg; -for (genvar n = 0; n < 8; n = n + 1) begin : crc +logic [DATA_W+24-1:0] crc_data_reg, crc_data_next; +reg [31:0] crc_state_reg = '0; +wire [31:0] crc_state; - taxi_lfsr #( - .LFSR_W(32), - .LFSR_POLY(32'h4c11db7), - .LFSR_GALOIS(1), - .LFSR_FEED_FORWARD(0), - .REVERSE(1), - .DATA_W(8*(n+1)), - .DATA_IN_EN(1'b1), - .DATA_OUT_EN(1'b0) - ) - eth_crc ( - .data_in(s_tdata_reg[0 +: 8*(n+1)]), - .state_in(crc_state_reg[7]), - .data_out(), - .state_out(crc_state[n]) - ); - -end +taxi_lfsr #( + .LFSR_W(32), + .LFSR_POLY(32'h4c11db7), + .LFSR_GALOIS(1), + .LFSR_FEED_FORWARD(0), + .REVERSE(1), + .DATA_W(DATA_W+24), + .DATA_IN_EN(1'b1), + .DATA_OUT_EN(1'b0), + .STATE_SHIFT_PRE(0), + .STATE_SHIFT_POST(-24) +) +eth_crc ( + .data_in(crc_data_reg), + .state_in('0), + .data_out(), + .state_out(crc_state) +); function [2:0] keep2empty(input [7:0] k); casez (k) @@ -262,68 +257,61 @@ function [2:0] keep2empty(input [7:0] k); endcase endfunction -// Mask input data -wire [DATA_W-1:0] s_axis_tx_tdata_masked; - -for (genvar n = 0; n < KEEP_W; n = n + 1) begin - assign s_axis_tx_tdata_masked[n*8 +: 8] = (n == 0 || s_axis_tx.tkeep[n]) ? s_axis_tx.tdata[n*8 +: 8] : 8'd0; -end - // FCS cycle calculation always_comb begin casez (s_empty_reg) 3'd7: begin - fcs_output_txd_0 = {{2{XGMII_IDLE}}, XGMII_TERM, ~crc_state[0][31:0], s_tdata_reg[7:0]}; + fcs_output_txd_0 = {{2{XGMII_IDLE}}, XGMII_TERM, ~crc_state[31:0], s_tdata_reg[7:0]}; fcs_output_txd_1 = {8{XGMII_IDLE}}; fcs_output_txc_0 = 8'b11100000; fcs_output_txc_1 = 8'b11111111; ifg_offset = 8'd3; end 3'd6: begin - fcs_output_txd_0 = {XGMII_IDLE, XGMII_TERM, ~crc_state[1][31:0], s_tdata_reg[15:0]}; + fcs_output_txd_0 = {XGMII_IDLE, XGMII_TERM, ~crc_state[31:0], s_tdata_reg[15:0]}; fcs_output_txd_1 = {8{XGMII_IDLE}}; fcs_output_txc_0 = 8'b11000000; fcs_output_txc_1 = 8'b11111111; ifg_offset = 8'd2; end 3'd5: begin - fcs_output_txd_0 = {XGMII_TERM, ~crc_state[2][31:0], s_tdata_reg[23:0]}; + fcs_output_txd_0 = {XGMII_TERM, ~crc_state[31:0], s_tdata_reg[23:0]}; fcs_output_txd_1 = {8{XGMII_IDLE}}; fcs_output_txc_0 = 8'b10000000; fcs_output_txc_1 = 8'b11111111; ifg_offset = 8'd1; end 3'd4: begin - fcs_output_txd_0 = {~crc_state[3][31:0], s_tdata_reg[31:0]}; + fcs_output_txd_0 = {~crc_state[31:0], s_tdata_reg[31:0]}; fcs_output_txd_1 = {{7{XGMII_IDLE}}, XGMII_TERM}; fcs_output_txc_0 = 8'b00000000; fcs_output_txc_1 = 8'b11111111; ifg_offset = 8'd8; end 3'd3: begin - fcs_output_txd_0 = {~crc_state[4][23:0], s_tdata_reg[39:0]}; - fcs_output_txd_1 = {{6{XGMII_IDLE}}, XGMII_TERM, ~crc_state_reg[4][31:24]}; + fcs_output_txd_0 = {~crc_state[23:0], s_tdata_reg[39:0]}; + fcs_output_txd_1 = {{6{XGMII_IDLE}}, XGMII_TERM, ~crc_state_reg[31:24]}; fcs_output_txc_0 = 8'b00000000; fcs_output_txc_1 = 8'b11111110; ifg_offset = 8'd7; end 3'd2: begin - fcs_output_txd_0 = {~crc_state[5][15:0], s_tdata_reg[47:0]}; - fcs_output_txd_1 = {{5{XGMII_IDLE}}, XGMII_TERM, ~crc_state_reg[5][31:16]}; + fcs_output_txd_0 = {~crc_state[15:0], s_tdata_reg[47:0]}; + fcs_output_txd_1 = {{5{XGMII_IDLE}}, XGMII_TERM, ~crc_state_reg[31:16]}; fcs_output_txc_0 = 8'b00000000; fcs_output_txc_1 = 8'b11111100; ifg_offset = 8'd6; end 3'd1: begin - fcs_output_txd_0 = {~crc_state[6][7:0], s_tdata_reg[55:0]}; - fcs_output_txd_1 = {{4{XGMII_IDLE}}, XGMII_TERM, ~crc_state_reg[6][31:8]}; + fcs_output_txd_0 = {~crc_state[7:0], s_tdata_reg[55:0]}; + fcs_output_txd_1 = {{4{XGMII_IDLE}}, XGMII_TERM, ~crc_state_reg[31:8]}; fcs_output_txc_0 = 8'b00000000; fcs_output_txc_1 = 8'b11111000; ifg_offset = 8'd5; end 3'd0: begin fcs_output_txd_0 = s_tdata_reg; - fcs_output_txd_1 = {{3{XGMII_IDLE}}, XGMII_TERM, ~crc_state_reg[7][31:0]}; + fcs_output_txd_1 = {{3{XGMII_IDLE}}, XGMII_TERM, ~crc_state_reg[31:0]}; fcs_output_txc_0 = 8'b00000000; fcs_output_txc_1 = 8'b11110000; ifg_offset = 8'd4; @@ -334,9 +322,6 @@ end always_comb begin state_next = STATE_IDLE; - reset_crc = 1'b0; - update_crc = 1'b0; - swap_lanes_next = swap_lanes_reg; frame_start_next = 1'b0; @@ -361,6 +346,8 @@ always_comb begin s_tdata_next = s_tdata_reg; s_empty_next = s_empty_reg; + crc_data_next = crc_data_reg; + m_axis_tx_cpl_tag_next = m_axis_tx_cpl_tag_reg; // XGMII idle @@ -435,6 +422,18 @@ always_comb begin ifg_cnt_next = '0; end + // FCS + casez (s_axis_tx.tkeep) + 8'b11111111: crc_data_next = {24'd0, s_axis_tx.tdata} ^ {56'd0, crc_state}; + 8'b01111111: crc_data_next = {24'd0, s_axis_tx.tdata[55:0], 8'd0} ^ {48'd0, crc_state, 8'd0}; + 8'bz0111111: crc_data_next = {24'd0, s_axis_tx.tdata[47:0], 16'd0} ^ {40'd0, crc_state, 16'd0}; + 8'bz0011111: crc_data_next = {24'd0, s_axis_tx.tdata[39:0], 24'd0} ^ {32'd0, crc_state, 24'd0}; + 8'bzzz01111: crc_data_next = {24'd0, s_axis_tx.tdata[31:0], 32'd0} ^ {24'd0, crc_state, 32'd0}; + 8'bzzzz0111: crc_data_next = {24'd0, s_axis_tx.tdata[23:0], 40'd0} ^ {16'd0, crc_state, 40'd0}; + 8'bzzzzz011: crc_data_next = {24'd0, s_axis_tx.tdata[15:0], 48'd0} ^ {8'd0, crc_state, 48'd0}; + default: crc_data_next = {24'd0, s_axis_tx.tdata[7:0], 56'd0} ^ {crc_state, 56'd0}; + endcase + case (state_reg) STATE_IDLE: begin // idle state - wait for data @@ -444,16 +443,17 @@ always_comb begin frame_len_next = 0; {frame_len_lim_cyc_next, frame_len_lim_last_next} = cfg_tx_max_pkt_len ^ 4; frame_len_lim_check_next = 1'b0; - reset_crc = 1'b1; s_axis_tx_tready_next = cfg_tx_enable; // XGMII idle xgmii_txd_next = {CTRL_W{XGMII_IDLE}}; xgmii_txc_next = {CTRL_W{1'b1}}; - s_tdata_next = s_axis_tx_tdata_masked; + s_tdata_next = s_axis_tx.tdata; s_empty_next = keep2empty(s_axis_tx.tkeep); + crc_data_next = {24'd0, s_axis_tx.tdata} ^ {56'd0, 32'hffffffff}; + m_axis_tx_cpl_tag_next = s_axis_tx.tid; if (s_axis_tx.tvalid && s_axis_tx.tready) begin @@ -472,13 +472,12 @@ always_comb begin end STATE_PAYLOAD: begin // transfer payload - update_crc = 1'b1; s_axis_tx_tready_next = 1'b1; xgmii_txd_next = s_tdata_reg; xgmii_txc_next = {CTRL_W{1'b0}}; - s_tdata_next = s_axis_tx_tdata_masked; + s_tdata_next = s_axis_tx.tdata; s_empty_next = keep2empty(s_axis_tx.tkeep); stat_tx_byte_next = 4'(CTRL_W); @@ -518,8 +517,6 @@ always_comb begin xgmii_txd_next = fcs_output_txd_0; xgmii_txc_next = fcs_output_txc_0; - update_crc = 1'b1; - ifg_count_next = (cfg_tx_ifg > 8'd12 ? cfg_tx_ifg : 8'd12) - ifg_offset + (swap_lanes_reg ? 8'd4 : 8'd0) + 8'(deficit_idle_count_reg); if (s_empty_reg <= 4) begin stat_tx_byte_next = 4'(CTRL_W); @@ -558,6 +555,8 @@ always_comb begin stat_tx_pkt_vlan_next = is_8021q_reg; stat_tx_err_oversize_next = frame_oversize_reg; + crc_data_next = {24'd0, s_axis_tx.tdata} ^ {56'd0, 32'hffffffff}; + if (DIC_EN) begin if (ifg_count_next > 8'd7) begin state_next = STATE_IFG; @@ -612,6 +611,8 @@ always_comb begin xgmii_txd_next = {CTRL_W{XGMII_IDLE}}; xgmii_txc_next = {CTRL_W{1'b1}}; + crc_data_next = {24'd0, s_axis_tx.tdata} ^ {56'd0, 32'hffffffff}; + if (ifg_count_reg > 8'd8) begin ifg_count_next = ifg_count_reg - 8'd8; end else begin @@ -676,6 +677,8 @@ always_ff @(posedge clk) begin s_tdata_reg <= s_tdata_next; s_empty_reg <= s_empty_next; + crc_data_reg <= crc_data_next; + s_axis_tx_tready_reg <= s_axis_tx_tready_next; m_axis_tx_cpl_tag_reg <= m_axis_tx_cpl_tag_next; @@ -740,17 +743,7 @@ always_ff @(posedge clk) begin end end - for (integer i = 0; i < 7; i = i + 1) begin - crc_state_reg[i] <= crc_state[i]; - end - - if (update_crc) begin - crc_state_reg[7] <= crc_state[7]; - end - - if (reset_crc) begin - crc_state_reg[7] <= '1; - end + crc_state_reg <= crc_state; swap_txd_reg <= xgmii_txd_next[63:32]; swap_txc_reg <= xgmii_txc_next[7:4];