diff --git a/.gitignore b/.gitignore index e1d6797..257d5c7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .venv sim_build +__pycache__ *.bkp diff --git a/ChaCha20_Poly1305_64/sim/__init__.py b/ChaCha20_Poly1305_64/sim/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ChaCha20_Poly1305_64/sim/__pycache__/chacha20_block.cpython-311.pyc b/ChaCha20_Poly1305_64/sim/__pycache__/chacha20_block.cpython-311.pyc deleted file mode 100644 index e541083..0000000 Binary files a/ChaCha20_Poly1305_64/sim/__pycache__/chacha20_block.cpython-311.pyc and /dev/null differ diff --git a/ChaCha20_Poly1305_64/sim/__pycache__/chacha20_block.cpython-313.pyc b/ChaCha20_Poly1305_64/sim/__pycache__/chacha20_block.cpython-313.pyc deleted file mode 100644 index 74b848c..0000000 Binary files a/ChaCha20_Poly1305_64/sim/__pycache__/chacha20_block.cpython-313.pyc and /dev/null differ diff --git a/ChaCha20_Poly1305_64/sim/chacha20.yaml b/ChaCha20_Poly1305_64/sim/chacha20.yaml index 5182166..86a2401 100644 --- a/ChaCha20_Poly1305_64/sim/chacha20.yaml +++ b/ChaCha20_Poly1305_64/sim/chacha20.yaml @@ -4,3 +4,4 @@ tests: modules: - "chacha20_block" sources: "sources.list" + waves: True \ No newline at end of file diff --git a/ChaCha20_Poly1305_64/sim/chacha20_block.py b/ChaCha20_Poly1305_64/sim/chacha20_block.py index 2ccb74e..fbd5762 100644 --- a/ChaCha20_Poly1305_64/sim/chacha20_block.py +++ b/ChaCha20_Poly1305_64/sim/chacha20_block.py @@ -1,5 +1,11 @@ import cocotb +import logging + +from chacha_helpers import chacha_block + +import struct + from cocotb.clock import Clock from cocotb.triggers import Timer, RisingEdge, FallingEdge @@ -11,11 +17,60 @@ class TB: def __init__(self, dut): self.dut = dut + self.log = logging.getLogger("cocotb.tb") + self.log.setLevel(logging.INFO) + + cocotb.start_soon(Clock(self.dut.i_clk, CLK_PERIOD, units="ns").start()) + + async def cycle_reset(self): + await self._cycle_reset(self.dut.i_rst, self.dut.i_clk) + + async def _cycle_reset(self, rst, clk): + rst.setimmediatevalue(0) + await RisingEdge(clk) + await RisingEdge(clk) + rst.value = 1 + await RisingEdge(clk) + await RisingEdge(clk) + rst.value = 0 + await RisingEdge(clk) + await RisingEdge(clk) + + + @cocotb.test async def test_sanity(dut): tb = TB(dut) + await tb.cycle_reset() + + data_in = [0x65787061, 0x6e642033, 0x322d6279, 0x7465206b, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0] + + data_out = chacha_block(data_in) + + tb.dut.i_counter.value = 0 + tb.dut.i_nonce.value = 0 + tb.dut.i_key.value = 0 + tb.dut.i_ready.value = 1 + tb.dut.i_valid.value = 1 await RisingEdge(tb.dut.i_clk) - await RisingEdge(tb.dut.i_clk) + tb.dut.i_valid.value = 0 + + await RisingEdge(tb.dut.o_valid) + state = tb.dut.o_state.value.integer + + state_bytes = int.to_bytes(state, 64, "little") + state_words = struct.unpack("16I", state_bytes) + + fail = False + for i, (sim_val, dut_val) in enumerate(zip(data_out, state_words)): + if sim_val != dut_val: + tb.log.info(f"{i}: {sim_val:x} -> {dut_val:x}") + fail = True + + assert not fail \ No newline at end of file diff --git a/ChaCha20_Poly1305_64/sim/chacha_helpers.py b/ChaCha20_Poly1305_64/sim/chacha_helpers.py new file mode 100644 index 0000000..3f276ef --- /dev/null +++ b/ChaCha20_Poly1305_64/sim/chacha_helpers.py @@ -0,0 +1,42 @@ +from typing import Sequence + +def ROTL(a,b): + return (((a) << (b)) | ((a) >> (32 - (b)))) + +def QR(a, b, c, d): + a = (a + b) & 0xffffffff + d = (d ^ a) & 0xffffffff + d = ROTL(d, 16) & 0xffffffff + c = (c + d) & 0xffffffff + b = (b ^ c) & 0xffffffff + b = ROTL(b, 12) + a = (a + b) & 0xffffffff + d = (d ^ a) & 0xffffffff + d = ROTL(d, 8) & 0xffffffff + c = (c + d) & 0xffffffff + b = (b ^ c) & 0xffffffff + b = ROTL(b, 7) & 0xffffffff + + return a, b, c, d + +ROUNDS = 20 +def chacha_block(data_in: Sequence[int]) -> Sequence[int]: + # make sure to copy this list so it doesn't get modified + data = data_in[:] + + + for i in range(0, ROUNDS, 2): + data[0], data[4], data[8], data[12] = QR(data[0], data[4], data[8], data[12]) + data[1], data[5], data[9], data[13] = QR(data[1], data[5], data[9], data[13]) + data[2], data[6], data[10], data[14] = QR(data[2], data[6], data[10], data[14]) + data[3], data[7], data[11], data[15] = QR(data[3], data[7], data[11], data[15]) + + data[0], data[5], data[10], data[15] = QR(data[0], data[5], data[10], data[15]) + data[1], data[6], data[11], data[12] = QR(data[1], data[6], data[11], data[12]) + data[2], data[7], data[8], data[13] = QR(data[2], data[7], data[8], data[13]) + data[3], data[4], data[9], data[14] = QR(data[3], data[4], data[9], data[14]) + + for i in range(16): + data[i] = (data[i] + data_in[i]) & 0xffffffff + + return data \ No newline at end of file diff --git a/ChaCha20_Poly1305_64/src/chacha20_block.sv b/ChaCha20_Poly1305_64/src/chacha20_block.sv index 6fbc681..8810bda 100644 --- a/ChaCha20_Poly1305_64/src/chacha20_block.sv +++ b/ChaCha20_Poly1305_64/src/chacha20_block.sv @@ -3,6 +3,7 @@ module chacha20_block #( parameter COUNTER_SIZE = 64, parameter NONCE_SIZE = 64, parameter STATE_SIZE = 512, + parameter ROUNDS = 20, parameter CONSTANT = 128'h657870616e642033322d62797465206b )( input logic i_clk, @@ -41,16 +42,19 @@ chacha20_qr u_chacha20_``name ( \ ) -logic [31:0] state [21][16]; -logic [3:0] valid[21]; +logic [31:0] state [ROUNDS+1][16]; +logic [3:0] valid[ROUNDS+1]; // logic [3:0] ready[21]; // small fifo for storing the initial state. // better to store it in a memory than in flops -logic [4:0] initial_state_wptr; -logic [4:0] initial_state_rptr; -logic [511:0] initial_states [20]; +logic [$clog2(ROUNDS)-1:0] initial_state_wptr; +logic [$clog2(ROUNDS)-1:0] initial_state_rptr; +logic [511:0] initial_states [ROUNDS]; + +logic [511:0] state_pre_add; +logic pre_add_valid; logic [511:0] write_initial_state, read_initial_state; @@ -65,14 +69,23 @@ always_ff @(posedge i_clk) begin initial_states[initial_state_wptr] <= write_initial_state; end - if (valid[19][0]) begin + pre_add_valid <= valid[ROUNDS][0]; + + if (valid[ROUNDS][0]) begin read_initial_state <= initial_states[initial_state_rptr]; + for (int i = 0; i < 16; i++) begin + state_pre_add[i*32 +: 32] <= state[ROUNDS][i]; + end end - o_valid <= &valid[20]; + o_valid <= pre_add_valid; + + // We cannot just add state_pre_add and read_initial state + // because the addition needs to be done wordwise, with no + // carries between 32 bit groups. for (int i = 0; i < 16; i++) begin - o_state[i*32 +: 32] <= state[20][i] + read_initial_state[i*32 +: 32]; + o_state[i*32 +: 32] <= state_pre_add[i*32 +: 32] + read_initial_state[i*32 +: 32]; end end @@ -111,7 +124,7 @@ end generate - for (genvar round = 0; round < 20; round+=2) begin : ROUND_LOOP + for (genvar round = 0; round < ROUNDS; round+=2) begin : ROUND_LOOP `QR(0, round, 0, 0, 4, 8, 12); `QR(1, round, 1, 1, 5, 9, 13); `QR(2, round, 2, 2, 6, 10, 14);