Add correct amount of memory

should be 160, not 20. There are 8 cycles per stage and 20 stages
This commit is contained in:
Byron Lathi
2025-06-29 13:29:38 -07:00
parent 4c7badbbbb
commit 196ea8e6d3
3 changed files with 79 additions and 25 deletions

View File

@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<efx:project name="chacha20_timing_test" description="" last_change="1751170370" sw_version="2025.1.110" last_run_state="pass" last_run_flow="bitstream" config_result_in_sync="true" design_ood="sync" place_ood="change" route_ood="change" xmlns:efx="http://www.efinixinc.com/enf_proj" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.efinixinc.com/enf_proj enf_proj.xsd">
<efx:project name="chacha20_timing_test" description="" last_change="1751223371" sw_version="2025.1.110" last_run_state="pass" last_run_flow="bitstream" config_result_in_sync="true" design_ood="sync" place_ood="sync" route_ood="sync" xmlns:efx="http://www.efinixinc.com/enf_proj" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.efinixinc.com/enf_proj enf_proj.xsd">
<efx:device_info>
<efx:family name="Titanium"/>
<efx:device name="Ti375N1156"/>

View File

@@ -6,12 +6,17 @@ from chacha_helpers import chacha_block
import struct
import random
from cocotb.clock import Clock
from cocotb.triggers import Timer, RisingEdge, FallingEdge
from cocotb.queue import Queue
CLK_PERIOD = 4
CONSTANT = [0x65787061, 0x6e642033, 0x322d6279, 0x7465206b]
class TB:
def __init__(self, dut):
@@ -20,9 +25,15 @@ class TB:
self.log = logging.getLogger("cocotb.tb")
self.log.setLevel(logging.INFO)
self.input_queue = Queue()
self.expected_queue = Queue()
self.output_queue = Queue()
cocotb.start_soon(Clock(self.dut.i_clk, CLK_PERIOD, units="ns").start())
cocotb.start_soon(self.run_input())
cocotb.start_soon(self.run_output())
async def cycle_reset(self):
await self._cycle_reset(self.dut.i_rst, self.dut.i_clk)
@@ -38,7 +49,41 @@ class TB:
await RisingEdge(clk)
await RisingEdge(clk)
async def write_input(self, key, counter, nonce):
await self.input_queue.put((key, counter, nonce))
data_in = CONSTANT[:]
data_in.extend(struct.unpack("8I", key.to_bytes(32, "little")))
data_in.extend(struct.unpack("2I", counter.to_bytes(8, "little")))
data_in.extend(struct.unpack("2I", nonce.to_bytes(8, "little")))
data_out = chacha_block(data_in)
await self.expected_queue.put(data_out)
async def run_input(self):
while True:
key, counter, nonce = await self.input_queue.get()
self.dut.i_key.value = key
self.dut.i_counter.value = counter
self.dut.i_nonce.value = nonce
self.dut.i_ready.value = 1
self.dut.i_valid.value = 1
await RisingEdge(self.dut.i_clk)
self.dut.i_valid.value = 0
async def run_output(self):
while True:
await RisingEdge(self.dut.i_clk)
if self.dut.o_valid.value:
state = self.dut.o_state.value.integer
state_bytes = int.to_bytes(state, 64, "little")
state_words = struct.unpack("16I", state_bytes)
await self.output_queue.put(state_words)
@cocotb.test
async def test_sanity(dut):
@@ -46,31 +91,27 @@ async def test_sanity(dut):
await tb.cycle_reset()
data_in = [0x65787061, 0x6e642033, 0x322d6279, 0x7465206b,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0]
data_out = chacha_block(data_in)
count = 1024
tb.dut.i_counter.value = 0
tb.dut.i_nonce.value = 0
tb.dut.i_key.value = 0
tb.dut.i_ready.value = 1
tb.dut.i_valid.value = 1
await RisingEdge(tb.dut.i_clk)
tb.dut.i_valid.value = 0
for i in range(count):
key = random.randint(0, 2**256-1)
counter = i
nonce = random.randint(0, 2**64-1)
await tb.write_input(key, counter, nonce)
await RisingEdge(tb.dut.o_valid)
state = tb.dut.o_state.value.integer
state_bytes = int.to_bytes(state, 64, "little")
state_words = struct.unpack("16I", state_bytes)
fail = False
for i, (sim_val, dut_val) in enumerate(zip(data_out, state_words)):
if sim_val != dut_val:
tb.log.info(f"{i}: {sim_val:x} -> {dut_val:x}")
fail = True
for _ in range(count):
sim_vals = await tb.expected_queue.get()
dut_vals = await tb.output_queue.get()
for i, (sim_val, dut_val) in enumerate(zip(sim_vals, dut_vals)):
if sim_val != dut_val:
tb.log.info(f"{i}: {sim_val:x} -> {dut_val:x}")
fail = True
await Timer(1, "us")
assert not fail

View File

@@ -21,6 +21,9 @@ module chacha20_block #(
input logic i_ready
);
// each round is 8 stages
localparam PIPE_STAGES = ROUNDS * 8;
`define QR(name, i, n, a, b, c, d) \
chacha20_qr u_chacha20_``name ( \
.i_clk (i_clk), \
@@ -49,9 +52,9 @@ logic [3:0] valid[ROUNDS+1];
// small fifo for storing the initial state.
// better to store it in a memory than in flops
logic [$clog2(ROUNDS)-1:0] initial_state_wptr;
logic [$clog2(ROUNDS)-1:0] initial_state_rptr;
logic [511:0] initial_states [ROUNDS];
logic [$clog2(PIPE_STAGES)-1:0] initial_state_wptr;
logic [$clog2(PIPE_STAGES)-1:0] initial_state_rptr;
logic [511:0] initial_states [PIPE_STAGES];
logic [511:0] state_pre_add;
logic pre_add_valid;
@@ -67,12 +70,22 @@ always_ff @(posedge i_clk) begin
end else begin
if (i_valid) begin
initial_states[initial_state_wptr] <= write_initial_state;
if (initial_state_wptr < PIPE_STAGES-1) begin
initial_state_wptr <= initial_state_wptr + 1;
end else begin
initial_state_wptr <= '0;
end
end
pre_add_valid <= valid[ROUNDS][0];
if (valid[ROUNDS][0]) begin
read_initial_state <= initial_states[initial_state_rptr];
if (initial_state_rptr < PIPE_STAGES-1) begin
initial_state_rptr <= initial_state_rptr + 1;
end else begin
initial_state_rptr <= '0;
end
for (int i = 0; i < 16; i++) begin
state_pre_add[i*32 +: 32] <= state[ROUNDS][i];
end