Add basic repo

This commit is contained in:
Byron Lathi
2025-06-28 15:48:14 -07:00
parent 369e29557c
commit 8136a7526b
21 changed files with 1069 additions and 0 deletions

View File

@@ -0,0 +1,128 @@
module chacha20_block #(
parameter KEY_SIZE = 256,
parameter COUNTER_SIZE = 64,
parameter NONCE_SIZE = 64,
parameter STATE_SIZE = 512,
parameter CONSTANT = 128'h657870616e642033322d62797465206b
)(
input logic i_clk,
input logic i_rst,
input logic [KEY_SIZE-1:0] i_key,
input logic [COUNTER_SIZE-1:0] i_counter,
input logic [NONCE_SIZE-1:0] i_nonce,
input logic i_valid,
output logic o_ready,
output logic [STATE_SIZE-1:0] o_state,
output logic o_valid,
input logic i_ready
);
`define QR(name, i, n, a, b, c, d) \
chacha20_qr u_chacha20_``name ( \
.i_clk (i_clk), \
.i_rst (i_rst), \
\
.i_valid (valid[i][n]), \
.o_ready (), \
.a_i (state[i][a]), \
.b_i (state[i][b]), \
.c_i (state[i][c]), \
.d_i (state[i][d]), \
\
.o_valid (valid[i+1][n]), \
.i_ready (i_ready), \
.a_o (state[i+1][a]), \
.b_o (state[i+1][b]), \
.c_o (state[i+1][c]), \
.d_o (state[i+1][d]) \
)
logic [31:0] state [21][16];
logic [3:0] valid[21];
// logic [3:0] ready[21];
// small fifo for storing the initial state.
// better to store it in a memory than in flops
logic [4:0] initial_state_wptr;
logic [4:0] initial_state_rptr;
logic [511:0] initial_states [20];
logic [511:0] write_initial_state, read_initial_state;
logic [31:0] original_initial_state [16];
always_ff @(posedge i_clk) begin
if (i_rst) begin
initial_state_rptr <= '0;
initial_state_wptr <= '0;
end else begin
if (i_valid) begin
initial_states[initial_state_wptr] <= write_initial_state;
end
if (valid[19][0]) begin
read_initial_state <= initial_states[initial_state_rptr];
end
o_valid <= &valid[20];
for (int i = 0; i < 16; i++) begin
o_state[i*32 +: 32] <= state[20][i] + read_initial_state[i*32 +: 32];
end
end
end
always_comb begin
for (int i = 0; i < 4; i++) begin
state[0][i] = CONSTANT[32*(3-i) +: 32]; // constant is big endian
end
for (int i = 0; i < 8; i++) begin
state[0][i+4] = i_key[32*i +: 32];
end
state[0][12] = i_counter[0 +: 32];
state[0][13] = i_counter[32 +: 32];
state[0][14] = i_nonce[0 +: 32];
state[0][15] = i_nonce[32 +: 32];
for (int i = 0; i < 4; i++) begin
valid[0][i] = i_valid;
end
o_ready = i_ready;
for (int i = 0; i < 16; i++) begin
write_initial_state[i*32 +: 32] = state[0][i];
original_initial_state[i] = read_initial_state[i*32 +: 32];
end
end
generate
for (genvar round = 0; round < 20; round+=2) begin : ROUND_LOOP
`QR(0, round, 0, 0, 4, 8, 12);
`QR(1, round, 1, 1, 5, 9, 13);
`QR(2, round, 2, 2, 6, 10, 14);
`QR(3, round, 3, 3, 7, 11, 15);
`QR(4, round+1, 0, 0, 5, 10, 15);
`QR(5, round+1, 1, 1, 6, 11, 12);
`QR(6, round+1, 2, 2, 7, 8, 13);
`QR(7, round+1, 3, 3, 4, 9, 14);
end
endgenerate
endmodule

View File

@@ -0,0 +1,96 @@
// do an entire round combinationally
`define ROTL(x, n) {x[31-n:0], x[31:32-n]}
module chacha20_qr #(
parameter PIPELINE_STAGES=7
)(
input logic i_clk,
input logic i_rst,
input logic i_valid,
output logic o_ready,
input logic [31:0] a_i, b_i, c_i, d_i,
output logic o_valid,
input logic i_ready,
output logic [31:0] a_o, b_o, c_o, d_o
);
logic [31:0] a_int [7];
logic [31:0] b_int [7];
logic [31:0] c_int [7];
logic [31:0] d_int [7];
logic [6:0] valid_sr;
// There is an output stage which handles isolating backpressure from the rest
// of the design from the core, so we don't need to worry about it here, we can
// have a single signal gate all of this.
assign o_ready = i_ready;
always_ff @(posedge i_clk) begin
if (i_rst) begin
valid_sr <= '0;
end else begin
if (i_ready) begin
// 1. Update A
a_int[0] <= a_i + b_i;
b_int[0] <= b_i;
c_int[0] <= c_i;
d_int[0] <= d_i;
// 2. Update D
a_int[1] <= a_int[0];
b_int[1] <= b_int[0];
c_int[1] <= c_int[0];
d_int[1] <= `ROTL(a_int[0], 16) ^ `ROTL(d_int[0], 16);
// 3. Update C
a_int[2] <= a_int[1];
b_int[2] <= b_int[1];
c_int[2] <= c_int[1] + d_int[1];
d_int[2] <= d_int[1];
// 4. Update B
a_int[3] <= a_int[2];
b_int[3] <= `ROTL(b_int[2], 12) ^ `ROTL(c_int[2], 12);
c_int[3] <= c_int[2];
d_int[3] <= d_int[2];
// 5. Update A
a_int[4] <= a_int[3] + b_int[3];
b_int[4] <= b_int[3];
c_int[4] <= c_int[3];
d_int[4] <= d_int[3];
// 6. Update D
a_int[5] <= a_int[4];
b_int[5] <= b_int[4];
c_int[5] <= c_int[4];
d_int[5] <= `ROTL(a_int[4], 8) ^ `ROTL(d_int[4], 8);
// 7. Update C
a_int[6] <= a_int[5];
b_int[6] <= b_int[5];
c_int[6] <= c_int[5] + d_int[5];
d_int[6] <= d_int[5];
// 8. Update B
a_o <= a_int[6];
b_o <= `ROTL(b_int[6], 7) ^ `ROTL(c_int[6], 7);
c_o <= c_int[6];
d_o <= d_int[6];
// Simultaneously, update valid_sr;
valid_sr <= {valid_sr[5:0], i_valid};
o_valid <= valid_sr[6];
end
end
end
endmodule

View File

@@ -0,0 +1,2 @@
chacha20_qr.sv
chacha20_block.sv