Files
crypto/ChaCha20_Poly1305_64/src/chacha20_qr.sv
2025-06-28 15:48:14 -07:00

97 lines
2.4 KiB
Systemverilog

// do an entire round combinationally
`define ROTL(x, n) {x[31-n:0], x[31:32-n]}
module chacha20_qr #(
parameter PIPELINE_STAGES=7
)(
input logic i_clk,
input logic i_rst,
input logic i_valid,
output logic o_ready,
input logic [31:0] a_i, b_i, c_i, d_i,
output logic o_valid,
input logic i_ready,
output logic [31:0] a_o, b_o, c_o, d_o
);
logic [31:0] a_int [7];
logic [31:0] b_int [7];
logic [31:0] c_int [7];
logic [31:0] d_int [7];
logic [6:0] valid_sr;
// There is an output stage which handles isolating backpressure from the rest
// of the design from the core, so we don't need to worry about it here, we can
// have a single signal gate all of this.
assign o_ready = i_ready;
always_ff @(posedge i_clk) begin
if (i_rst) begin
valid_sr <= '0;
end else begin
if (i_ready) begin
// 1. Update A
a_int[0] <= a_i + b_i;
b_int[0] <= b_i;
c_int[0] <= c_i;
d_int[0] <= d_i;
// 2. Update D
a_int[1] <= a_int[0];
b_int[1] <= b_int[0];
c_int[1] <= c_int[0];
d_int[1] <= `ROTL(a_int[0], 16) ^ `ROTL(d_int[0], 16);
// 3. Update C
a_int[2] <= a_int[1];
b_int[2] <= b_int[1];
c_int[2] <= c_int[1] + d_int[1];
d_int[2] <= d_int[1];
// 4. Update B
a_int[3] <= a_int[2];
b_int[3] <= `ROTL(b_int[2], 12) ^ `ROTL(c_int[2], 12);
c_int[3] <= c_int[2];
d_int[3] <= d_int[2];
// 5. Update A
a_int[4] <= a_int[3] + b_int[3];
b_int[4] <= b_int[3];
c_int[4] <= c_int[3];
d_int[4] <= d_int[3];
// 6. Update D
a_int[5] <= a_int[4];
b_int[5] <= b_int[4];
c_int[5] <= c_int[4];
d_int[5] <= `ROTL(a_int[4], 8) ^ `ROTL(d_int[4], 8);
// 7. Update C
a_int[6] <= a_int[5];
b_int[6] <= b_int[5];
c_int[6] <= c_int[5] + d_int[5];
d_int[6] <= d_int[5];
// 8. Update B
a_o <= a_int[6];
b_o <= `ROTL(b_int[6], 7) ^ `ROTL(c_int[6], 7);
c_o <= c_int[6];
d_o <= d_int[6];
// Simultaneously, update valid_sr;
valid_sr <= {valid_sr[5:0], i_valid};
o_valid <= valid_sr[6];
end
end
end
endmodule