From 20d98e117b8757982b7ef5e7ca47546de0a1b72f Mon Sep 17 00:00:00 2001 From: Byron Lathi Date: Sat, 28 Jun 2025 20:34:46 -0700 Subject: [PATCH] Get sim working, make some changes to the final addition --- .gitignore | 1 + ChaCha20_Poly1305_64/sim/__init__.py | 0 .../chacha20_block.cpython-311.pyc | Bin 1200 -> 0 bytes .../chacha20_block.cpython-313.pyc | Bin 1421 -> 0 bytes ChaCha20_Poly1305_64/sim/chacha20.yaml | 1 + ChaCha20_Poly1305_64/sim/chacha20_block.py | 57 +++++++++++++++++- ChaCha20_Poly1305_64/sim/chacha_helpers.py | 42 +++++++++++++ ChaCha20_Poly1305_64/src/chacha20_block.sv | 31 +++++++--- 8 files changed, 122 insertions(+), 10 deletions(-) create mode 100644 ChaCha20_Poly1305_64/sim/__init__.py delete mode 100644 ChaCha20_Poly1305_64/sim/__pycache__/chacha20_block.cpython-311.pyc delete mode 100644 ChaCha20_Poly1305_64/sim/__pycache__/chacha20_block.cpython-313.pyc create mode 100644 ChaCha20_Poly1305_64/sim/chacha_helpers.py diff --git a/.gitignore b/.gitignore index e1d6797..257d5c7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .venv sim_build +__pycache__ *.bkp diff --git a/ChaCha20_Poly1305_64/sim/__init__.py b/ChaCha20_Poly1305_64/sim/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ChaCha20_Poly1305_64/sim/__pycache__/chacha20_block.cpython-311.pyc b/ChaCha20_Poly1305_64/sim/__pycache__/chacha20_block.cpython-311.pyc deleted file mode 100644 index e54108383db2d6f915cb910074e887758c7215d4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1200 zcmbtS&1=*^6n``M=r-H6hZgssXlYLYY5YEjcxbU61SxIlA&A3n5_hvrHg%F7R##Z) zB2@}{DC|EVmC}F2V+9W-;7JO4OIc4n`6k(ol`06ndGkJgZ(cI<-uqB0X@K{5Vz+!>i8 z4m1iw&sMvzi_ZQK0CUtP%b<^Z7sw->#LB!ESZlfzD=V(=@K{@NL$_JGZ`Yi-xKQ!D zfn(*LKm#9G?qihd&-L#4MpPA)X9QT4}8a{ws_Dq7J1NctSB@r-dc|WW4>NNzJ8T0243sh zjjOlV?VCpE`i51nAY-5^o;kbTiVKXnO*dlfd2wZKcIb?XKb25);Beynmb_iq!qrCr zt0ar0X2o|Hi#5jlz~1nLJ;vCxjf$5=_&Geg2YehA7hmF{4XF@DID?{-uY?k%oWNO; z<% zW>(l+R8T8S_PQld{m!CZ1{Sc-V_d$TiXX?yhX-uL@`^Zj}= zC=^V<_08?~+FucXza^6`ni7LH66@fAL*{`?tjv=+g)3!AtMlp{<#bNt+MLdHlu@1= zg9HD!&Pg)>_4snz>RJ-t) z0kT<%rqa~P&;m=X38fp!O7*&a99cP`uJmG|yP+G#O<_iHgU2ii!!DQU2+e11a7mc6 z@65A>)Arf(ZwW22u@pfgFIeS8G>c@4<+3&7oSDp+y!=uX!5aMf z+<`BREu*+;9J*s1`tIoW^ty4)xL&+g{Mo$m;zsH1&C_!w&WehKzuKz1mSt@{)nD5`dW42`7T&4vx>WNJP^Jh+8m0?If&^T3|r!7@pkg>MF5F&7#9fXc$XU zc>WAhmXgpyiJR%jBt;W>tRqfc1AE@*N6>IDo@fTaHa2@}PyX}SFAlEFY?=GMwy$2i za`Ec&mF2r;Ws_EZp_MjCnIemouNT#2sKfQ$1&4XlmoZcMM zB|T0MT6sQ-c4B1s5GGwYio+)`nu@UDt(M0lE?+TE5|b6@@(-0HE2vB(iJ!k9g}YNB zj8nrody^kUg=}wh3c-Dq5b`?|9_T>E|AdnpaPqH0an1Zkr{n_J0SWJqfMR@h_@l!+ P3MCT*6_i3kCEou7sJa$O diff --git a/ChaCha20_Poly1305_64/sim/chacha20.yaml b/ChaCha20_Poly1305_64/sim/chacha20.yaml index 5182166..86a2401 100644 --- a/ChaCha20_Poly1305_64/sim/chacha20.yaml +++ b/ChaCha20_Poly1305_64/sim/chacha20.yaml @@ -4,3 +4,4 @@ tests: modules: - "chacha20_block" sources: "sources.list" + waves: True \ No newline at end of file diff --git a/ChaCha20_Poly1305_64/sim/chacha20_block.py b/ChaCha20_Poly1305_64/sim/chacha20_block.py index 2ccb74e..fbd5762 100644 --- a/ChaCha20_Poly1305_64/sim/chacha20_block.py +++ b/ChaCha20_Poly1305_64/sim/chacha20_block.py @@ -1,5 +1,11 @@ import cocotb +import logging + +from chacha_helpers import chacha_block + +import struct + from cocotb.clock import Clock from cocotb.triggers import Timer, RisingEdge, FallingEdge @@ -11,11 +17,60 @@ class TB: def __init__(self, dut): self.dut = dut + self.log = logging.getLogger("cocotb.tb") + self.log.setLevel(logging.INFO) + + cocotb.start_soon(Clock(self.dut.i_clk, CLK_PERIOD, units="ns").start()) + + async def cycle_reset(self): + await self._cycle_reset(self.dut.i_rst, self.dut.i_clk) + + async def _cycle_reset(self, rst, clk): + rst.setimmediatevalue(0) + await RisingEdge(clk) + await RisingEdge(clk) + rst.value = 1 + await RisingEdge(clk) + await RisingEdge(clk) + rst.value = 0 + await RisingEdge(clk) + await RisingEdge(clk) + + + @cocotb.test async def test_sanity(dut): tb = TB(dut) + await tb.cycle_reset() + + data_in = [0x65787061, 0x6e642033, 0x322d6279, 0x7465206b, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0] + + data_out = chacha_block(data_in) + + tb.dut.i_counter.value = 0 + tb.dut.i_nonce.value = 0 + tb.dut.i_key.value = 0 + tb.dut.i_ready.value = 1 + tb.dut.i_valid.value = 1 await RisingEdge(tb.dut.i_clk) - await RisingEdge(tb.dut.i_clk) + tb.dut.i_valid.value = 0 + + await RisingEdge(tb.dut.o_valid) + state = tb.dut.o_state.value.integer + + state_bytes = int.to_bytes(state, 64, "little") + state_words = struct.unpack("16I", state_bytes) + + fail = False + for i, (sim_val, dut_val) in enumerate(zip(data_out, state_words)): + if sim_val != dut_val: + tb.log.info(f"{i}: {sim_val:x} -> {dut_val:x}") + fail = True + + assert not fail \ No newline at end of file diff --git a/ChaCha20_Poly1305_64/sim/chacha_helpers.py b/ChaCha20_Poly1305_64/sim/chacha_helpers.py new file mode 100644 index 0000000..3f276ef --- /dev/null +++ b/ChaCha20_Poly1305_64/sim/chacha_helpers.py @@ -0,0 +1,42 @@ +from typing import Sequence + +def ROTL(a,b): + return (((a) << (b)) | ((a) >> (32 - (b)))) + +def QR(a, b, c, d): + a = (a + b) & 0xffffffff + d = (d ^ a) & 0xffffffff + d = ROTL(d, 16) & 0xffffffff + c = (c + d) & 0xffffffff + b = (b ^ c) & 0xffffffff + b = ROTL(b, 12) + a = (a + b) & 0xffffffff + d = (d ^ a) & 0xffffffff + d = ROTL(d, 8) & 0xffffffff + c = (c + d) & 0xffffffff + b = (b ^ c) & 0xffffffff + b = ROTL(b, 7) & 0xffffffff + + return a, b, c, d + +ROUNDS = 20 +def chacha_block(data_in: Sequence[int]) -> Sequence[int]: + # make sure to copy this list so it doesn't get modified + data = data_in[:] + + + for i in range(0, ROUNDS, 2): + data[0], data[4], data[8], data[12] = QR(data[0], data[4], data[8], data[12]) + data[1], data[5], data[9], data[13] = QR(data[1], data[5], data[9], data[13]) + data[2], data[6], data[10], data[14] = QR(data[2], data[6], data[10], data[14]) + data[3], data[7], data[11], data[15] = QR(data[3], data[7], data[11], data[15]) + + data[0], data[5], data[10], data[15] = QR(data[0], data[5], data[10], data[15]) + data[1], data[6], data[11], data[12] = QR(data[1], data[6], data[11], data[12]) + data[2], data[7], data[8], data[13] = QR(data[2], data[7], data[8], data[13]) + data[3], data[4], data[9], data[14] = QR(data[3], data[4], data[9], data[14]) + + for i in range(16): + data[i] = (data[i] + data_in[i]) & 0xffffffff + + return data \ No newline at end of file diff --git a/ChaCha20_Poly1305_64/src/chacha20_block.sv b/ChaCha20_Poly1305_64/src/chacha20_block.sv index 6fbc681..8810bda 100644 --- a/ChaCha20_Poly1305_64/src/chacha20_block.sv +++ b/ChaCha20_Poly1305_64/src/chacha20_block.sv @@ -3,6 +3,7 @@ module chacha20_block #( parameter COUNTER_SIZE = 64, parameter NONCE_SIZE = 64, parameter STATE_SIZE = 512, + parameter ROUNDS = 20, parameter CONSTANT = 128'h657870616e642033322d62797465206b )( input logic i_clk, @@ -41,16 +42,19 @@ chacha20_qr u_chacha20_``name ( \ ) -logic [31:0] state [21][16]; -logic [3:0] valid[21]; +logic [31:0] state [ROUNDS+1][16]; +logic [3:0] valid[ROUNDS+1]; // logic [3:0] ready[21]; // small fifo for storing the initial state. // better to store it in a memory than in flops -logic [4:0] initial_state_wptr; -logic [4:0] initial_state_rptr; -logic [511:0] initial_states [20]; +logic [$clog2(ROUNDS)-1:0] initial_state_wptr; +logic [$clog2(ROUNDS)-1:0] initial_state_rptr; +logic [511:0] initial_states [ROUNDS]; + +logic [511:0] state_pre_add; +logic pre_add_valid; logic [511:0] write_initial_state, read_initial_state; @@ -65,14 +69,23 @@ always_ff @(posedge i_clk) begin initial_states[initial_state_wptr] <= write_initial_state; end - if (valid[19][0]) begin + pre_add_valid <= valid[ROUNDS][0]; + + if (valid[ROUNDS][0]) begin read_initial_state <= initial_states[initial_state_rptr]; + for (int i = 0; i < 16; i++) begin + state_pre_add[i*32 +: 32] <= state[ROUNDS][i]; + end end - o_valid <= &valid[20]; + o_valid <= pre_add_valid; + + // We cannot just add state_pre_add and read_initial state + // because the addition needs to be done wordwise, with no + // carries between 32 bit groups. for (int i = 0; i < 16; i++) begin - o_state[i*32 +: 32] <= state[20][i] + read_initial_state[i*32 +: 32]; + o_state[i*32 +: 32] <= state_pre_add[i*32 +: 32] + read_initial_state[i*32 +: 32]; end end @@ -111,7 +124,7 @@ end generate - for (genvar round = 0; round < 20; round+=2) begin : ROUND_LOOP + for (genvar round = 0; round < ROUNDS; round+=2) begin : ROUND_LOOP `QR(0, round, 0, 0, 4, 8, 12); `QR(1, round, 1, 1, 5, 9, 13); `QR(2, round, 2, 2, 6, 10, 14);