76 lines
1.6 KiB
Systemverilog
76 lines
1.6 KiB
Systemverilog
// do an entire round combinationally
|
|
|
|
`define ROTL(x, n) {x[31-n:0], x[31:32-n]}
|
|
|
|
|
|
module chacha20_qr #(
|
|
parameter PIPELINE_STAGES=7
|
|
)(
|
|
input i_clk,
|
|
input i_rst,
|
|
|
|
input i_valid,
|
|
output o_ready,
|
|
input logic [31:0] a_i, b_i, c_i, d_i,
|
|
|
|
output o_valid,
|
|
input i_ready,
|
|
output logic [31:0] a_o, b_o, c_o, d_o
|
|
);
|
|
|
|
|
|
|
|
logic [31:0] a_int [7];
|
|
logic [31:0] b_int [7];
|
|
logic [31:0] c_int [7];
|
|
logic [31:0] d_int [7];
|
|
|
|
logic [6:0] valid_sr;
|
|
|
|
// There is an output stage which handles isolating backpressure from the rest
|
|
// of the design from the core, so we don't need to worry about it here, we can
|
|
// have a single signal gate all of this.
|
|
assign o_ready = i_ready;
|
|
|
|
|
|
always_ff @(posedge i_clk) begin
|
|
if (i_rst) begin
|
|
valid_sr <= '0;
|
|
end else begin
|
|
if (i_ready) begin
|
|
// 1. Update A
|
|
a_int[0] <= a_i + b_i;
|
|
b_int[0] <= b_i;
|
|
c_int[0] <= c_i;
|
|
d_int[0] <= d_i;
|
|
|
|
// 2. Update D
|
|
a_int[1] <= a_int[0];
|
|
b_int[1] <= b_int[0];
|
|
c_int[1] <= c_int[0];
|
|
d_int[1] <= `ROTL(a_int[0] ^ d_int[0], 16);
|
|
|
|
end
|
|
end
|
|
end
|
|
|
|
endmodule
|
|
|
|
|
|
// always_comb begin
|
|
// a_int_0 = a_i + b_i;
|
|
// d_int_0 = a_int_0 ^ d_i;
|
|
// d_int_1 = `ROTL(d_int_0, 16);
|
|
// c_int_0 = c_i + d_int_1;
|
|
// b_int_0 = c_int_0 ^ b_i;
|
|
// b_int_1 = `ROTL(b_int_0, 12);
|
|
// a_o = a_int_0 + b_int_1;
|
|
// d_int_2 = d_int_1 ^ a_o;
|
|
// d_o = `ROTL(d_int_2, 8);
|
|
// c_o = c_int_0 + d_o;
|
|
// b_int_2 = b_int_1 ^ c_o;
|
|
// b_o = `ROTL(b_int_2, 7);
|
|
// end
|
|
|
|
// endmodule
|