// do an entire round combinationally `define ROTL(x, n) {x[31-n:0], x[31:32-n]} module chacha20_qr #( parameter PIPELINE_STAGES=7 )( input i_clk, input i_rst, input i_valid, output o_ready, input logic [31:0] a_i, b_i, c_i, d_i, output o_valid, input i_ready, output logic [31:0] a_o, b_o, c_o, d_o ); logic [31:0] a_int [7]; logic [31:0] b_int [7]; logic [31:0] c_int [7]; logic [31:0] d_int [7]; logic [6:0] valid_sr; // There is an output stage which handles isolating backpressure from the rest // of the design from the core, so we don't need to worry about it here, we can // have a single signal gate all of this. assign o_ready = i_ready; always_ff @(posedge i_clk) begin if (i_rst) begin valid_sr <= '0; end else begin if (i_ready) begin // 1. Update A a_int[0] <= a_i + b_i; b_int[0] <= b_i; c_int[0] <= c_i; d_int[0] <= d_i; // 2. Update D a_int[1] <= a_int[0]; b_int[1] <= b_int[0]; c_int[1] <= c_int[0]; d_int[1] <= `ROTL(a_int[0] ^ d_int[0], 16); end end end endmodule // always_comb begin // a_int_0 = a_i + b_i; // d_int_0 = a_int_0 ^ d_i; // d_int_1 = `ROTL(d_int_0, 16); // c_int_0 = c_i + d_int_1; // b_int_0 = c_int_0 ^ b_i; // b_int_1 = `ROTL(b_int_0, 12); // a_o = a_int_0 + b_int_1; // d_int_2 = d_int_1 ^ a_o; // d_o = `ROTL(d_int_2, 8); // c_o = c_int_0 + d_o; // b_int_2 = b_int_1 ^ c_o; // b_o = `ROTL(b_int_2, 7); // end // endmodule