First shot at 1/4 version

This commit is contained in:
Byron Lathi
2025-07-02 06:32:58 -07:00
parent 196ea8e6d3
commit a617277005
8 changed files with 733 additions and 2 deletions

View File

@@ -0,0 +1,137 @@
module chacha20_pipelined_block #(
parameter KEY_SIZE = 256,
parameter COUNTER_SIZE = 64,
parameter NONCE_SIZE = 64,
parameter STATE_SIZE = 512,
parameter ROUNDS = 20,
parameter CONSTANT = 128'h657870616e642033322d62797465206b
)(
input logic i_clk,
input logic i_rst,
input logic [KEY_SIZE-1:0] i_key,
input logic [COUNTER_SIZE-1:0] i_counter,
input logic [NONCE_SIZE-1:0] i_nonce,
input logic i_valid,
output logic o_ready,
output logic [STATE_SIZE-1:0] o_state,
output logic o_valid,
input logic i_ready
);
// each round is 8 stages
localparam PIPE_STAGES = ROUNDS * 8;
logic [511:0] state [ROUNDS+1];
logic valid[ROUNDS+1];
logic ready[ROUNDS+1];
// small fifo for storing the initial state.
// better to store it in a memory than in flops
logic [$clog2(PIPE_STAGES)-1:0] initial_state_wptr;
logic [$clog2(PIPE_STAGES)-1:0] initial_state_rptr;
logic [511:0] initial_states [PIPE_STAGES];
logic [511:0] state_pre_add;
logic pre_add_valid;
logic [511:0] write_initial_state, read_initial_state;
logic [511:0] original_initial_state;
always_ff @(posedge i_clk) begin
if (i_rst) begin
initial_state_rptr <= '0;
initial_state_wptr <= '0;
end else begin
if (i_valid) begin
initial_states[initial_state_wptr] <= write_initial_state;
if (initial_state_wptr < PIPE_STAGES-1) begin
initial_state_wptr <= initial_state_wptr + 1;
end else begin
initial_state_wptr <= '0;
end
end
pre_add_valid <= valid[ROUNDS];
if (valid[ROUNDS]) begin
read_initial_state <= initial_states[initial_state_rptr];
if (initial_state_rptr < PIPE_STAGES-1) begin
initial_state_rptr <= initial_state_rptr + 1;
end else begin
initial_state_rptr <= '0;
end
state_pre_add <= state[ROUNDS];
end
o_valid <= pre_add_valid;
// We cannot just add state_pre_add and read_initial state
// because the addition needs to be done wordwise, with no
// carries between 32 bit groups.
for (int i = 0; i < 16; i++) begin
o_state[i*32 +: 32] <= state_pre_add[i*32 +: 32] + read_initial_state[i*32 +: 32];
end
end
end
always_comb begin
for (int i = 0; i < 4; i++) begin
state[0][32*(3-i) +: 32] = CONSTANT[32*(3-i) +: 32]; // constant is big endian
end
for (int i = 0; i < 8; i++) begin
state[0][32*(i+4) +: 32] = i_key[32*i +: 32];
end
state[0][12*32 +: 32] = i_counter[0 +: 32];
state[0][13*32 +: 32] = i_counter[32 +: 32];
state[0][14*32 +: 32] = i_nonce[0 +: 32];
state[0][15*32 +: 32] = i_nonce[32 +: 32];
valid[0] = i_valid;
o_ready = ready[0];
write_initial_state = state[0];
original_initial_state = read_initial_state;
end
generate
for (genvar round = 0; round < ROUNDS; round+=2) begin : ROUND_LOOP
chacha20_pipelined_round #(.IS_EVEN(0)) chacha20_pipelined_round_inst_odd (
.i_clk (i_clk),
.i_rst (i_rst),
.i_state (state[round]),
.i_valid (valid[round]),
.o_ready (ready[round]),
.o_state (state[round+1]),
.o_valid (valid[round+1]),
.i_ready (ready[round+1])
);
chacha20_pipelined_round #(.IS_EVEN(1)) chacha20_pipelined_round_inst_even (
.i_clk (i_clk),
.i_rst (i_rst),
.i_state (state[round+1]),
.i_valid (valid[round+1]),
.o_ready (ready[round+1]),
.o_state (state[round+2]),
.o_valid (valid[round+2]),
.i_ready (ready[round+2])
);
end
endgenerate
endmodule

View File

@@ -0,0 +1,219 @@
module chacha20_pipelined_round #(
parameter IS_EVEN = 1,
parameter KEY_SIZE = 256,
parameter COUNTER_SIZE = 64,
parameter NONCE_SIZE = 64,
parameter STATE_SIZE = 512,
parameter ROUNDS = 20,
parameter CONSTANT = 128'h657870616e642033322d62797465206b
)(
input logic i_clk,
input logic i_rst,
input logic [STATE_SIZE-1:0] i_state,
input logic i_valid,
output logic o_ready,
output logic [STATE_SIZE-1:0] o_state,
output logic o_valid,
input logic i_ready
);
logic [31:0] state_in [16];
logic [31:0] state_out [16];
logic [31:0] qr_a_i, qr_a_o;
logic [31:0] qr_b_i, qr_b_o;
logic [31:0] qr_c_i, qr_c_o;
logic [31:0] qr_d_i, qr_d_o;
logic qr_i_valid, qr_o_valid;
logic qr_o_ready, qr_i_ready;
// state is shared between the in fsm and the out fsm
enum logic [1:0] {STATE_IN, STATE_SHIFT, STATE_OUT} in_fsm_state, out_fsm_state;
logic [1:0] in_counter;
logic [1:0] out_counter;
`define QR_IN(a, b, c, d) \
qr_a_i = state_in[a]; \
qr_b_i = state_in[b]; \
qr_c_i = state_in[c]; \
qr_d_i = state_in[d]
`define QR_OUT(a, b, c, d) \
state_out[a] <= qr_a_o; \
state_out[b] <= qr_b_o; \
state_out[c] <= qr_c_o; \
state_out[d] <= qr_d_o
always_comb begin
if (IS_EVEN) begin
case (in_counter)
0: begin
`QR_IN(0, 5, 10, 15);
end
1: begin
`QR_IN(1, 5, 9, 13);
end
2: begin
`QR_IN(2, 6, 10, 14);
end
3: begin
`QR_IN(3, 7, 11, 15);
end
endcase
end else begin
case (in_counter)
0: begin
`QR_IN(0, 4, 8, 12);
end
1: begin
`QR_IN(1, 5, 9, 13);
end
2: begin
`QR_IN(2, 6, 10, 14);
end
3: begin
`QR_IN(3, 7, 11, 15);
end
endcase
end
for (int i = 0; i < 16; i++) begin
o_state[32*i +: 32] = state_out[i];
end
end
chacha20_qr u_chacha20_qr (
.i_clk (i_clk),
.i_rst (i_rst),
.i_valid (qr_i_valid),
.o_ready (qr_o_ready),
.a_i (qr_a_i),
.b_i (qr_b_i),
.c_i (qr_c_i),
.d_i (qr_d_i),
.o_valid (qr_o_valid),
.i_ready (qr_i_ready),
.a_o (qr_a_o),
.b_o (qr_b_o),
.c_o (qr_c_o),
.d_o (qr_d_o)
);
assign o_ready = in_fsm_state == STATE_IN;
assign qr_i_valid = in_fsm_state == STATE_SHIFT;
assign qr_i_ready = out_fsm_state == STATE_SHIFT;
assign o_valid = out_fsm_state == STATE_OUT;
always_ff @(posedge i_clk) begin
if (i_rst) begin
in_fsm_state <= STATE_IN;
out_fsm_state <= STATE_SHIFT;
in_counter <= '0;
out_counter <= '0;
end else begin
case (in_fsm_state)
STATE_IN: begin
if (i_valid) begin
// Register incoming state as unpacked 32 bit words
for (int i = 0; i < 16; i++) begin
state_in[i] = i_state[32*i +: 32];
end
in_counter <= '0;
in_fsm_state <= STATE_SHIFT;
end
end
STATE_SHIFT: begin
if (qr_o_ready) begin
in_counter <= in_counter + 1;
if (in_counter == 3) begin
in_fsm_state <= STATE_IN;
end
end
end
default: begin
in_fsm_state <= STATE_IN;
end
endcase
case (out_fsm_state)
STATE_SHIFT: begin
if (qr_o_valid) begin
if (IS_EVEN) begin
case (out_counter)
0: begin
`QR_OUT(0, 5, 10, 15);
end
1: begin
`QR_OUT(1, 6, 11, 12);
end
2: begin
`QR_OUT(2, 7, 8, 13);
end
3: begin
`QR_OUT(3, 4, 9, 14);
end
endcase
end else begin
case (out_counter)
0: begin
`QR_OUT(0, 4, 8, 12);
end
1: begin
`QR_OUT(1, 5, 9, 13);
end
2: begin
`QR_OUT(2, 6, 10, 14);
end
3: begin
`QR_OUT(3, 7, 11, 15);
end
endcase
end
out_counter <= out_counter + 1;
if (out_counter == 3) begin
out_fsm_state <= STATE_OUT;
end
end
end
STATE_OUT: begin
if (i_ready) begin
out_fsm_state <= STATE_SHIFT;
end
end
default: begin
out_fsm_state <= STATE_SHIFT;
end
endcase
end
end
endmodule

View File

@@ -1,2 +1,4 @@
chacha20_qr.sv
chacha20_block.sv
chacha20_block.sv
chacha20_pipelined_round.sv
chacha20_pipelined_block.sv