Get sim working, make some changes to the final addition

This commit is contained in:
Byron Lathi
2025-06-28 20:34:46 -07:00
parent 8136a7526b
commit 20d98e117b
8 changed files with 122 additions and 10 deletions

1
.gitignore vendored
View File

@@ -1,5 +1,6 @@
.venv .venv
sim_build sim_build
__pycache__
*.bkp *.bkp

View File

View File

@@ -4,3 +4,4 @@ tests:
modules: modules:
- "chacha20_block" - "chacha20_block"
sources: "sources.list" sources: "sources.list"
waves: True

View File

@@ -1,5 +1,11 @@
import cocotb import cocotb
import logging
from chacha_helpers import chacha_block
import struct
from cocotb.clock import Clock from cocotb.clock import Clock
from cocotb.triggers import Timer, RisingEdge, FallingEdge from cocotb.triggers import Timer, RisingEdge, FallingEdge
@@ -11,11 +17,60 @@ class TB:
def __init__(self, dut): def __init__(self, dut):
self.dut = dut self.dut = dut
self.log = logging.getLogger("cocotb.tb")
self.log.setLevel(logging.INFO)
cocotb.start_soon(Clock(self.dut.i_clk, CLK_PERIOD, units="ns").start()) cocotb.start_soon(Clock(self.dut.i_clk, CLK_PERIOD, units="ns").start())
async def cycle_reset(self):
await self._cycle_reset(self.dut.i_rst, self.dut.i_clk)
async def _cycle_reset(self, rst, clk):
rst.setimmediatevalue(0)
await RisingEdge(clk)
await RisingEdge(clk)
rst.value = 1
await RisingEdge(clk)
await RisingEdge(clk)
rst.value = 0
await RisingEdge(clk)
await RisingEdge(clk)
@cocotb.test @cocotb.test
async def test_sanity(dut): async def test_sanity(dut):
tb = TB(dut) tb = TB(dut)
await tb.cycle_reset()
data_in = [0x65787061, 0x6e642033, 0x322d6279, 0x7465206b,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0]
data_out = chacha_block(data_in)
tb.dut.i_counter.value = 0
tb.dut.i_nonce.value = 0
tb.dut.i_key.value = 0
tb.dut.i_ready.value = 1
tb.dut.i_valid.value = 1
await RisingEdge(tb.dut.i_clk) await RisingEdge(tb.dut.i_clk)
await RisingEdge(tb.dut.i_clk) tb.dut.i_valid.value = 0
await RisingEdge(tb.dut.o_valid)
state = tb.dut.o_state.value.integer
state_bytes = int.to_bytes(state, 64, "little")
state_words = struct.unpack("16I", state_bytes)
fail = False
for i, (sim_val, dut_val) in enumerate(zip(data_out, state_words)):
if sim_val != dut_val:
tb.log.info(f"{i}: {sim_val:x} -> {dut_val:x}")
fail = True
assert not fail

View File

@@ -0,0 +1,42 @@
from typing import Sequence
def ROTL(a,b):
return (((a) << (b)) | ((a) >> (32 - (b))))
def QR(a, b, c, d):
a = (a + b) & 0xffffffff
d = (d ^ a) & 0xffffffff
d = ROTL(d, 16) & 0xffffffff
c = (c + d) & 0xffffffff
b = (b ^ c) & 0xffffffff
b = ROTL(b, 12)
a = (a + b) & 0xffffffff
d = (d ^ a) & 0xffffffff
d = ROTL(d, 8) & 0xffffffff
c = (c + d) & 0xffffffff
b = (b ^ c) & 0xffffffff
b = ROTL(b, 7) & 0xffffffff
return a, b, c, d
ROUNDS = 20
def chacha_block(data_in: Sequence[int]) -> Sequence[int]:
# make sure to copy this list so it doesn't get modified
data = data_in[:]
for i in range(0, ROUNDS, 2):
data[0], data[4], data[8], data[12] = QR(data[0], data[4], data[8], data[12])
data[1], data[5], data[9], data[13] = QR(data[1], data[5], data[9], data[13])
data[2], data[6], data[10], data[14] = QR(data[2], data[6], data[10], data[14])
data[3], data[7], data[11], data[15] = QR(data[3], data[7], data[11], data[15])
data[0], data[5], data[10], data[15] = QR(data[0], data[5], data[10], data[15])
data[1], data[6], data[11], data[12] = QR(data[1], data[6], data[11], data[12])
data[2], data[7], data[8], data[13] = QR(data[2], data[7], data[8], data[13])
data[3], data[4], data[9], data[14] = QR(data[3], data[4], data[9], data[14])
for i in range(16):
data[i] = (data[i] + data_in[i]) & 0xffffffff
return data

View File

@@ -3,6 +3,7 @@ module chacha20_block #(
parameter COUNTER_SIZE = 64, parameter COUNTER_SIZE = 64,
parameter NONCE_SIZE = 64, parameter NONCE_SIZE = 64,
parameter STATE_SIZE = 512, parameter STATE_SIZE = 512,
parameter ROUNDS = 20,
parameter CONSTANT = 128'h657870616e642033322d62797465206b parameter CONSTANT = 128'h657870616e642033322d62797465206b
)( )(
input logic i_clk, input logic i_clk,
@@ -41,16 +42,19 @@ chacha20_qr u_chacha20_``name ( \
) )
logic [31:0] state [21][16]; logic [31:0] state [ROUNDS+1][16];
logic [3:0] valid[21]; logic [3:0] valid[ROUNDS+1];
// logic [3:0] ready[21]; // logic [3:0] ready[21];
// small fifo for storing the initial state. // small fifo for storing the initial state.
// better to store it in a memory than in flops // better to store it in a memory than in flops
logic [4:0] initial_state_wptr; logic [$clog2(ROUNDS)-1:0] initial_state_wptr;
logic [4:0] initial_state_rptr; logic [$clog2(ROUNDS)-1:0] initial_state_rptr;
logic [511:0] initial_states [20]; logic [511:0] initial_states [ROUNDS];
logic [511:0] state_pre_add;
logic pre_add_valid;
logic [511:0] write_initial_state, read_initial_state; logic [511:0] write_initial_state, read_initial_state;
@@ -65,14 +69,23 @@ always_ff @(posedge i_clk) begin
initial_states[initial_state_wptr] <= write_initial_state; initial_states[initial_state_wptr] <= write_initial_state;
end end
if (valid[19][0]) begin pre_add_valid <= valid[ROUNDS][0];
if (valid[ROUNDS][0]) begin
read_initial_state <= initial_states[initial_state_rptr]; read_initial_state <= initial_states[initial_state_rptr];
for (int i = 0; i < 16; i++) begin
state_pre_add[i*32 +: 32] <= state[ROUNDS][i];
end
end end
o_valid <= &valid[20]; o_valid <= pre_add_valid;
// We cannot just add state_pre_add and read_initial state
// because the addition needs to be done wordwise, with no
// carries between 32 bit groups.
for (int i = 0; i < 16; i++) begin for (int i = 0; i < 16; i++) begin
o_state[i*32 +: 32] <= state[20][i] + read_initial_state[i*32 +: 32]; o_state[i*32 +: 32] <= state_pre_add[i*32 +: 32] + read_initial_state[i*32 +: 32];
end end
end end
@@ -111,7 +124,7 @@ end
generate generate
for (genvar round = 0; round < 20; round+=2) begin : ROUND_LOOP for (genvar round = 0; round < ROUNDS; round+=2) begin : ROUND_LOOP
`QR(0, round, 0, 0, 4, 8, 12); `QR(0, round, 0, 0, 4, 8, 12);
`QR(1, round, 1, 1, 5, 9, 13); `QR(1, round, 1, 1, 5, 9, 13);
`QR(2, round, 2, 2, 6, 10, 14); `QR(2, round, 2, 2, 6, 10, 14);