Add correct amount of memory
should be 160, not 20. There are 8 cycles per stage and 20 stages
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<efx:project name="chacha20_timing_test" description="" last_change="1751170370" sw_version="2025.1.110" last_run_state="pass" last_run_flow="bitstream" config_result_in_sync="true" design_ood="sync" place_ood="change" route_ood="change" xmlns:efx="http://www.efinixinc.com/enf_proj" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.efinixinc.com/enf_proj enf_proj.xsd">
|
<efx:project name="chacha20_timing_test" description="" last_change="1751223371" sw_version="2025.1.110" last_run_state="pass" last_run_flow="bitstream" config_result_in_sync="true" design_ood="sync" place_ood="sync" route_ood="sync" xmlns:efx="http://www.efinixinc.com/enf_proj" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.efinixinc.com/enf_proj enf_proj.xsd">
|
||||||
<efx:device_info>
|
<efx:device_info>
|
||||||
<efx:family name="Titanium"/>
|
<efx:family name="Titanium"/>
|
||||||
<efx:device name="Ti375N1156"/>
|
<efx:device name="Ti375N1156"/>
|
||||||
|
|||||||
@@ -6,12 +6,17 @@ from chacha_helpers import chacha_block
|
|||||||
|
|
||||||
import struct
|
import struct
|
||||||
|
|
||||||
|
import random
|
||||||
|
|
||||||
|
|
||||||
from cocotb.clock import Clock
|
from cocotb.clock import Clock
|
||||||
from cocotb.triggers import Timer, RisingEdge, FallingEdge
|
from cocotb.triggers import Timer, RisingEdge, FallingEdge
|
||||||
|
from cocotb.queue import Queue
|
||||||
|
|
||||||
CLK_PERIOD = 4
|
CLK_PERIOD = 4
|
||||||
|
|
||||||
|
CONSTANT = [0x65787061, 0x6e642033, 0x322d6279, 0x7465206b]
|
||||||
|
|
||||||
|
|
||||||
class TB:
|
class TB:
|
||||||
def __init__(self, dut):
|
def __init__(self, dut):
|
||||||
@@ -20,9 +25,15 @@ class TB:
|
|||||||
self.log = logging.getLogger("cocotb.tb")
|
self.log = logging.getLogger("cocotb.tb")
|
||||||
self.log.setLevel(logging.INFO)
|
self.log.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
self.input_queue = Queue()
|
||||||
|
|
||||||
|
self.expected_queue = Queue()
|
||||||
|
self.output_queue = Queue()
|
||||||
|
|
||||||
cocotb.start_soon(Clock(self.dut.i_clk, CLK_PERIOD, units="ns").start())
|
cocotb.start_soon(Clock(self.dut.i_clk, CLK_PERIOD, units="ns").start())
|
||||||
|
|
||||||
|
cocotb.start_soon(self.run_input())
|
||||||
|
cocotb.start_soon(self.run_output())
|
||||||
|
|
||||||
async def cycle_reset(self):
|
async def cycle_reset(self):
|
||||||
await self._cycle_reset(self.dut.i_rst, self.dut.i_clk)
|
await self._cycle_reset(self.dut.i_rst, self.dut.i_clk)
|
||||||
@@ -38,7 +49,41 @@ class TB:
|
|||||||
await RisingEdge(clk)
|
await RisingEdge(clk)
|
||||||
await RisingEdge(clk)
|
await RisingEdge(clk)
|
||||||
|
|
||||||
|
async def write_input(self, key, counter, nonce):
|
||||||
|
await self.input_queue.put((key, counter, nonce))
|
||||||
|
|
||||||
|
|
||||||
|
data_in = CONSTANT[:]
|
||||||
|
data_in.extend(struct.unpack("8I", key.to_bytes(32, "little")))
|
||||||
|
data_in.extend(struct.unpack("2I", counter.to_bytes(8, "little")))
|
||||||
|
data_in.extend(struct.unpack("2I", nonce.to_bytes(8, "little")))
|
||||||
|
|
||||||
|
data_out = chacha_block(data_in)
|
||||||
|
|
||||||
|
await self.expected_queue.put(data_out)
|
||||||
|
|
||||||
|
async def run_input(self):
|
||||||
|
while True:
|
||||||
|
key, counter, nonce = await self.input_queue.get()
|
||||||
|
|
||||||
|
self.dut.i_key.value = key
|
||||||
|
self.dut.i_counter.value = counter
|
||||||
|
self.dut.i_nonce.value = nonce
|
||||||
|
self.dut.i_ready.value = 1
|
||||||
|
self.dut.i_valid.value = 1
|
||||||
|
await RisingEdge(self.dut.i_clk)
|
||||||
|
self.dut.i_valid.value = 0
|
||||||
|
|
||||||
|
async def run_output(self):
|
||||||
|
while True:
|
||||||
|
await RisingEdge(self.dut.i_clk)
|
||||||
|
if self.dut.o_valid.value:
|
||||||
|
state = self.dut.o_state.value.integer
|
||||||
|
|
||||||
|
state_bytes = int.to_bytes(state, 64, "little")
|
||||||
|
state_words = struct.unpack("16I", state_bytes)
|
||||||
|
|
||||||
|
await self.output_queue.put(state_words)
|
||||||
|
|
||||||
@cocotb.test
|
@cocotb.test
|
||||||
async def test_sanity(dut):
|
async def test_sanity(dut):
|
||||||
@@ -46,31 +91,27 @@ async def test_sanity(dut):
|
|||||||
|
|
||||||
await tb.cycle_reset()
|
await tb.cycle_reset()
|
||||||
|
|
||||||
data_in = [0x65787061, 0x6e642033, 0x322d6279, 0x7465206b,
|
count = 1024
|
||||||
0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0]
|
|
||||||
|
|
||||||
data_out = chacha_block(data_in)
|
|
||||||
|
|
||||||
tb.dut.i_counter.value = 0
|
for i in range(count):
|
||||||
tb.dut.i_nonce.value = 0
|
key = random.randint(0, 2**256-1)
|
||||||
tb.dut.i_key.value = 0
|
counter = i
|
||||||
tb.dut.i_ready.value = 1
|
nonce = random.randint(0, 2**64-1)
|
||||||
tb.dut.i_valid.value = 1
|
|
||||||
await RisingEdge(tb.dut.i_clk)
|
await tb.write_input(key, counter, nonce)
|
||||||
tb.dut.i_valid.value = 0
|
|
||||||
|
|
||||||
await RisingEdge(tb.dut.o_valid)
|
|
||||||
state = tb.dut.o_state.value.integer
|
|
||||||
|
|
||||||
state_bytes = int.to_bytes(state, 64, "little")
|
|
||||||
state_words = struct.unpack("16I", state_bytes)
|
|
||||||
|
|
||||||
fail = False
|
fail = False
|
||||||
for i, (sim_val, dut_val) in enumerate(zip(data_out, state_words)):
|
for _ in range(count):
|
||||||
if sim_val != dut_val:
|
sim_vals = await tb.expected_queue.get()
|
||||||
tb.log.info(f"{i}: {sim_val:x} -> {dut_val:x}")
|
dut_vals = await tb.output_queue.get()
|
||||||
fail = True
|
|
||||||
|
for i, (sim_val, dut_val) in enumerate(zip(sim_vals, dut_vals)):
|
||||||
|
if sim_val != dut_val:
|
||||||
|
tb.log.info(f"{i}: {sim_val:x} -> {dut_val:x}")
|
||||||
|
fail = True
|
||||||
|
|
||||||
|
await Timer(1, "us")
|
||||||
|
|
||||||
assert not fail
|
assert not fail
|
||||||
@@ -21,6 +21,9 @@ module chacha20_block #(
|
|||||||
input logic i_ready
|
input logic i_ready
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// each round is 8 stages
|
||||||
|
localparam PIPE_STAGES = ROUNDS * 8;
|
||||||
|
|
||||||
`define QR(name, i, n, a, b, c, d) \
|
`define QR(name, i, n, a, b, c, d) \
|
||||||
chacha20_qr u_chacha20_``name ( \
|
chacha20_qr u_chacha20_``name ( \
|
||||||
.i_clk (i_clk), \
|
.i_clk (i_clk), \
|
||||||
@@ -49,9 +52,9 @@ logic [3:0] valid[ROUNDS+1];
|
|||||||
|
|
||||||
// small fifo for storing the initial state.
|
// small fifo for storing the initial state.
|
||||||
// better to store it in a memory than in flops
|
// better to store it in a memory than in flops
|
||||||
logic [$clog2(ROUNDS)-1:0] initial_state_wptr;
|
logic [$clog2(PIPE_STAGES)-1:0] initial_state_wptr;
|
||||||
logic [$clog2(ROUNDS)-1:0] initial_state_rptr;
|
logic [$clog2(PIPE_STAGES)-1:0] initial_state_rptr;
|
||||||
logic [511:0] initial_states [ROUNDS];
|
logic [511:0] initial_states [PIPE_STAGES];
|
||||||
|
|
||||||
logic [511:0] state_pre_add;
|
logic [511:0] state_pre_add;
|
||||||
logic pre_add_valid;
|
logic pre_add_valid;
|
||||||
@@ -67,12 +70,22 @@ always_ff @(posedge i_clk) begin
|
|||||||
end else begin
|
end else begin
|
||||||
if (i_valid) begin
|
if (i_valid) begin
|
||||||
initial_states[initial_state_wptr] <= write_initial_state;
|
initial_states[initial_state_wptr] <= write_initial_state;
|
||||||
|
if (initial_state_wptr < PIPE_STAGES-1) begin
|
||||||
|
initial_state_wptr <= initial_state_wptr + 1;
|
||||||
|
end else begin
|
||||||
|
initial_state_wptr <= '0;
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
pre_add_valid <= valid[ROUNDS][0];
|
pre_add_valid <= valid[ROUNDS][0];
|
||||||
|
|
||||||
if (valid[ROUNDS][0]) begin
|
if (valid[ROUNDS][0]) begin
|
||||||
read_initial_state <= initial_states[initial_state_rptr];
|
read_initial_state <= initial_states[initial_state_rptr];
|
||||||
|
if (initial_state_rptr < PIPE_STAGES-1) begin
|
||||||
|
initial_state_rptr <= initial_state_rptr + 1;
|
||||||
|
end else begin
|
||||||
|
initial_state_rptr <= '0;
|
||||||
|
end
|
||||||
for (int i = 0; i < 16; i++) begin
|
for (int i = 0; i < 16; i++) begin
|
||||||
state_pre_add[i*32 +: 32] <= state[ROUNDS][i];
|
state_pre_add[i*32 +: 32] <= state[ROUNDS][i];
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user