From 196ea8e6d33f4f845fc7c25d72960229c79429b8 Mon Sep 17 00:00:00 2001 From: Byron Lathi Date: Sun, 29 Jun 2025 13:29:38 -0700 Subject: [PATCH] Add correct amount of memory should be 160, not 20. There are 8 cycles per stage and 20 stages --- .../chacha20_timing_test.xml | 2 +- ChaCha20_Poly1305_64/sim/chacha20_block.py | 83 ++++++++++++++----- ChaCha20_Poly1305_64/src/chacha20_block.sv | 19 ++++- 3 files changed, 79 insertions(+), 25 deletions(-) diff --git a/ChaCha20_Poly1305_64/chacha20_timing_test/chacha20_timing_test.xml b/ChaCha20_Poly1305_64/chacha20_timing_test/chacha20_timing_test.xml index 130e781..30e411e 100644 --- a/ChaCha20_Poly1305_64/chacha20_timing_test/chacha20_timing_test.xml +++ b/ChaCha20_Poly1305_64/chacha20_timing_test/chacha20_timing_test.xml @@ -1,5 +1,5 @@ - + diff --git a/ChaCha20_Poly1305_64/sim/chacha20_block.py b/ChaCha20_Poly1305_64/sim/chacha20_block.py index fbd5762..88f3c09 100644 --- a/ChaCha20_Poly1305_64/sim/chacha20_block.py +++ b/ChaCha20_Poly1305_64/sim/chacha20_block.py @@ -6,12 +6,17 @@ from chacha_helpers import chacha_block import struct +import random + from cocotb.clock import Clock from cocotb.triggers import Timer, RisingEdge, FallingEdge +from cocotb.queue import Queue CLK_PERIOD = 4 +CONSTANT = [0x65787061, 0x6e642033, 0x322d6279, 0x7465206b] + class TB: def __init__(self, dut): @@ -20,9 +25,15 @@ class TB: self.log = logging.getLogger("cocotb.tb") self.log.setLevel(logging.INFO) + self.input_queue = Queue() + + self.expected_queue = Queue() + self.output_queue = Queue() cocotb.start_soon(Clock(self.dut.i_clk, CLK_PERIOD, units="ns").start()) + cocotb.start_soon(self.run_input()) + cocotb.start_soon(self.run_output()) async def cycle_reset(self): await self._cycle_reset(self.dut.i_rst, self.dut.i_clk) @@ -38,7 +49,41 @@ class TB: await RisingEdge(clk) await RisingEdge(clk) + async def write_input(self, key, counter, nonce): + await self.input_queue.put((key, counter, nonce)) + + data_in = CONSTANT[:] + data_in.extend(struct.unpack("8I", key.to_bytes(32, "little"))) + data_in.extend(struct.unpack("2I", counter.to_bytes(8, "little"))) + data_in.extend(struct.unpack("2I", nonce.to_bytes(8, "little"))) + + data_out = chacha_block(data_in) + + await self.expected_queue.put(data_out) + + async def run_input(self): + while True: + key, counter, nonce = await self.input_queue.get() + + self.dut.i_key.value = key + self.dut.i_counter.value = counter + self.dut.i_nonce.value = nonce + self.dut.i_ready.value = 1 + self.dut.i_valid.value = 1 + await RisingEdge(self.dut.i_clk) + self.dut.i_valid.value = 0 + + async def run_output(self): + while True: + await RisingEdge(self.dut.i_clk) + if self.dut.o_valid.value: + state = self.dut.o_state.value.integer + + state_bytes = int.to_bytes(state, 64, "little") + state_words = struct.unpack("16I", state_bytes) + + await self.output_queue.put(state_words) @cocotb.test async def test_sanity(dut): @@ -46,31 +91,27 @@ async def test_sanity(dut): await tb.cycle_reset() - data_in = [0x65787061, 0x6e642033, 0x322d6279, 0x7465206b, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0] - - data_out = chacha_block(data_in) + count = 1024 - tb.dut.i_counter.value = 0 - tb.dut.i_nonce.value = 0 - tb.dut.i_key.value = 0 - tb.dut.i_ready.value = 1 - tb.dut.i_valid.value = 1 - await RisingEdge(tb.dut.i_clk) - tb.dut.i_valid.value = 0 + for i in range(count): + key = random.randint(0, 2**256-1) + counter = i + nonce = random.randint(0, 2**64-1) + + await tb.write_input(key, counter, nonce) - await RisingEdge(tb.dut.o_valid) - state = tb.dut.o_state.value.integer - state_bytes = int.to_bytes(state, 64, "little") - state_words = struct.unpack("16I", state_bytes) fail = False - for i, (sim_val, dut_val) in enumerate(zip(data_out, state_words)): - if sim_val != dut_val: - tb.log.info(f"{i}: {sim_val:x} -> {dut_val:x}") - fail = True + for _ in range(count): + sim_vals = await tb.expected_queue.get() + dut_vals = await tb.output_queue.get() + + for i, (sim_val, dut_val) in enumerate(zip(sim_vals, dut_vals)): + if sim_val != dut_val: + tb.log.info(f"{i}: {sim_val:x} -> {dut_val:x}") + fail = True + + await Timer(1, "us") assert not fail \ No newline at end of file diff --git a/ChaCha20_Poly1305_64/src/chacha20_block.sv b/ChaCha20_Poly1305_64/src/chacha20_block.sv index 8810bda..2230973 100644 --- a/ChaCha20_Poly1305_64/src/chacha20_block.sv +++ b/ChaCha20_Poly1305_64/src/chacha20_block.sv @@ -21,6 +21,9 @@ module chacha20_block #( input logic i_ready ); +// each round is 8 stages +localparam PIPE_STAGES = ROUNDS * 8; + `define QR(name, i, n, a, b, c, d) \ chacha20_qr u_chacha20_``name ( \ .i_clk (i_clk), \ @@ -49,9 +52,9 @@ logic [3:0] valid[ROUNDS+1]; // small fifo for storing the initial state. // better to store it in a memory than in flops -logic [$clog2(ROUNDS)-1:0] initial_state_wptr; -logic [$clog2(ROUNDS)-1:0] initial_state_rptr; -logic [511:0] initial_states [ROUNDS]; +logic [$clog2(PIPE_STAGES)-1:0] initial_state_wptr; +logic [$clog2(PIPE_STAGES)-1:0] initial_state_rptr; +logic [511:0] initial_states [PIPE_STAGES]; logic [511:0] state_pre_add; logic pre_add_valid; @@ -67,12 +70,22 @@ always_ff @(posedge i_clk) begin end else begin if (i_valid) begin initial_states[initial_state_wptr] <= write_initial_state; + if (initial_state_wptr < PIPE_STAGES-1) begin + initial_state_wptr <= initial_state_wptr + 1; + end else begin + initial_state_wptr <= '0; + end end pre_add_valid <= valid[ROUNDS][0]; if (valid[ROUNDS][0]) begin read_initial_state <= initial_states[initial_state_rptr]; + if (initial_state_rptr < PIPE_STAGES-1) begin + initial_state_rptr <= initial_state_rptr + 1; + end else begin + initial_state_rptr <= '0; + end for (int i = 0; i < 16; i++) begin state_pre_add[i*32 +: 32] <= state[ROUNDS][i]; end