97 lines
2.4 KiB
Systemverilog
97 lines
2.4 KiB
Systemverilog
// do an entire round combinationally
|
|
|
|
`define ROTL(x, n) {x[31-n:0], x[31:32-n]}
|
|
|
|
|
|
module chacha20_qr #(
|
|
parameter PIPELINE_STAGES=7
|
|
)(
|
|
input logic i_clk,
|
|
input logic i_rst,
|
|
|
|
input logic i_valid,
|
|
output logic o_ready,
|
|
input logic [31:0] a_i, b_i, c_i, d_i,
|
|
|
|
output logic o_valid,
|
|
input logic i_ready,
|
|
output logic [31:0] a_o, b_o, c_o, d_o
|
|
);
|
|
|
|
|
|
|
|
logic [31:0] a_int [7];
|
|
logic [31:0] b_int [7];
|
|
logic [31:0] c_int [7];
|
|
logic [31:0] d_int [7];
|
|
|
|
logic [6:0] valid_sr;
|
|
|
|
// There is an output stage which handles isolating backpressure from the rest
|
|
// of the design from the core, so we don't need to worry about it here, we can
|
|
// have a single signal gate all of this.
|
|
assign o_ready = i_ready;
|
|
|
|
|
|
always_ff @(posedge i_clk) begin
|
|
if (i_rst) begin
|
|
valid_sr <= '0;
|
|
end else begin
|
|
if (i_ready) begin
|
|
// 1. Update A
|
|
a_int[0] <= a_i + b_i;
|
|
b_int[0] <= b_i;
|
|
c_int[0] <= c_i;
|
|
d_int[0] <= d_i;
|
|
|
|
// 2. Update D
|
|
a_int[1] <= a_int[0];
|
|
b_int[1] <= b_int[0];
|
|
c_int[1] <= c_int[0];
|
|
d_int[1] <= `ROTL(a_int[0], 16) ^ `ROTL(d_int[0], 16);
|
|
|
|
// 3. Update C
|
|
a_int[2] <= a_int[1];
|
|
b_int[2] <= b_int[1];
|
|
c_int[2] <= c_int[1] + d_int[1];
|
|
d_int[2] <= d_int[1];
|
|
|
|
// 4. Update B
|
|
a_int[3] <= a_int[2];
|
|
b_int[3] <= `ROTL(b_int[2], 12) ^ `ROTL(c_int[2], 12);
|
|
c_int[3] <= c_int[2];
|
|
d_int[3] <= d_int[2];
|
|
|
|
// 5. Update A
|
|
a_int[4] <= a_int[3] + b_int[3];
|
|
b_int[4] <= b_int[3];
|
|
c_int[4] <= c_int[3];
|
|
d_int[4] <= d_int[3];
|
|
|
|
// 6. Update D
|
|
a_int[5] <= a_int[4];
|
|
b_int[5] <= b_int[4];
|
|
c_int[5] <= c_int[4];
|
|
d_int[5] <= `ROTL(a_int[4], 8) ^ `ROTL(d_int[4], 8);
|
|
|
|
// 7. Update C
|
|
a_int[6] <= a_int[5];
|
|
b_int[6] <= b_int[5];
|
|
c_int[6] <= c_int[5] + d_int[5];
|
|
d_int[6] <= d_int[5];
|
|
|
|
// 8. Update B
|
|
a_o <= a_int[6];
|
|
b_o <= `ROTL(b_int[6], 7) ^ `ROTL(c_int[6], 7);
|
|
c_o <= c_int[6];
|
|
d_o <= d_int[6];
|
|
|
|
// Simultaneously, update valid_sr;
|
|
valid_sr <= {valid_sr[5:0], i_valid};
|
|
o_valid <= valid_sr[6];
|
|
end
|
|
end
|
|
end
|
|
|
|
endmodule
|