First shot at 1/4 version
This commit is contained in:
@@ -98,4 +98,7 @@ Actualyl its 88k luts... its 512ff * 4 * 20 = 40k ff
|
||||
|
||||
Lets just leave it for now even if its overkill. The hardware would support up to
|
||||
40Gbps, and technically the FPGA has 16 lanes so could do 160Gbps in total, if
|
||||
we designed a custom board for it (or 120 if we used FMC connectors).
|
||||
we designed a custom board for it (or 120 if we used FMC connectors).
|
||||
|
||||
If we only use a single quarter round multiplexed between all 4, then the same
|
||||
quarter round module can have 2 different blocks going through it at once.
|
||||
154
ChaCha20_Poly1305_64/doc/qr_pipelining.drawio
Normal file
154
ChaCha20_Poly1305_64/doc/qr_pipelining.drawio
Normal file
@@ -0,0 +1,154 @@
|
||||
<mxfile host="Electron" agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/25.0.2 Chrome/128.0.6613.186 Electron/32.2.5 Safari/537.36" version="25.0.2">
|
||||
<diagram name="Page-1" id="de-ffv5K_z_w-HYk_-7N">
|
||||
<mxGraphModel dx="721" dy="1186" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
|
||||
<root>
|
||||
<mxCell id="0" />
|
||||
<mxCell id="1" parent="0" />
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-17" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;" edge="1" parent="1" source="ZoMeok9N2fHc0OsoVYq9-1" target="ZoMeok9N2fHc0OsoVYq9-16">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-1" value="<div>Quarter Round</div>" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="200" y="60" width="80" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-2" value="state in" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="80" y="40" width="40" height="80" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-5" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="ZoMeok9N2fHc0OsoVYq9-4" target="ZoMeok9N2fHc0OsoVYq9-1">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-4" value="" style="shape=trapezoid;perimeter=trapezoidPerimeter;whiteSpace=wrap;html=1;fixedSize=1;rotation=90;" vertex="1" parent="1">
|
||||
<mxGeometry x="115" y="65" width="80" height="30" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-6" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1" source="ZoMeok9N2fHc0OsoVYq9-2">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="170" y="100" as="sourcePoint" />
|
||||
<mxPoint x="140" y="50" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-7" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="120" y="70" as="sourcePoint" />
|
||||
<mxPoint x="140" y="70" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-8" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="120" y="90" as="sourcePoint" />
|
||||
<mxPoint x="140" y="90" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-9" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="120" y="110" as="sourcePoint" />
|
||||
<mxPoint x="140" y="110" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-12" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="340" y="49.88511627906976" as="sourcePoint" />
|
||||
<mxPoint x="360" y="49.88511627906976" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-13" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="340" y="69.88511627906988" as="sourcePoint" />
|
||||
<mxPoint x="360" y="69.88511627906988" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-14" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="340" y="89.88511627906988" as="sourcePoint" />
|
||||
<mxPoint x="360" y="89.88511627906988" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-15" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="340" y="109.88511627906988" as="sourcePoint" />
|
||||
<mxPoint x="360" y="109.88511627906988" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-16" value="" style="shape=trapezoid;perimeter=trapezoidPerimeter;whiteSpace=wrap;html=1;fixedSize=1;rotation=90;flipV=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="285" y="65" width="80" height="30" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-34" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="ZoMeok9N2fHc0OsoVYq9-18" target="ZoMeok9N2fHc0OsoVYq9-21">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-18" value="state out" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="360" y="40" width="40" height="80" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-19" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;" edge="1" parent="1" source="ZoMeok9N2fHc0OsoVYq9-20" target="ZoMeok9N2fHc0OsoVYq9-32">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-20" value="<div>Quarter Round</div>" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="560" y="60" width="80" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-21" value="state in" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="440" y="40" width="40" height="80" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-22" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="ZoMeok9N2fHc0OsoVYq9-23" target="ZoMeok9N2fHc0OsoVYq9-20">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-23" value="" style="shape=trapezoid;perimeter=trapezoidPerimeter;whiteSpace=wrap;html=1;fixedSize=1;rotation=90;" vertex="1" parent="1">
|
||||
<mxGeometry x="475" y="65" width="80" height="30" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-24" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1" source="ZoMeok9N2fHc0OsoVYq9-21">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="530" y="100" as="sourcePoint" />
|
||||
<mxPoint x="500" y="50" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-25" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="480" y="70" as="sourcePoint" />
|
||||
<mxPoint x="500" y="70" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-26" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="480" y="90" as="sourcePoint" />
|
||||
<mxPoint x="500" y="90" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-27" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="480" y="110" as="sourcePoint" />
|
||||
<mxPoint x="500" y="110" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-28" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="700" y="49.88511627906976" as="sourcePoint" />
|
||||
<mxPoint x="720" y="49.88511627906976" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-29" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="700" y="69.88511627906988" as="sourcePoint" />
|
||||
<mxPoint x="720" y="69.88511627906988" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-30" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="700" y="89.88511627906988" as="sourcePoint" />
|
||||
<mxPoint x="720" y="89.88511627906988" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-31" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
|
||||
<mxGeometry width="50" height="50" relative="1" as="geometry">
|
||||
<mxPoint x="700" y="109.88511627906988" as="sourcePoint" />
|
||||
<mxPoint x="720" y="109.88511627906988" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-32" value="" style="shape=trapezoid;perimeter=trapezoidPerimeter;whiteSpace=wrap;html=1;fixedSize=1;rotation=90;flipV=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="645" y="65" width="80" height="30" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-33" value="state out" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="720" y="40" width="40" height="80" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="ZoMeok9N2fHc0OsoVYq9-35" value="<h1 style="margin-top: 0px;">State Blocks</h1><p>Each round shuffles the state, so we cannot simply pipeline the quarter roundds all the way through. Before each round, we must regroup all of the words int a single 512 bit state, then separate them again into the desired words to put into the quarter round. Even and odd rounds use different words, but every even round and every odd round is the same, so this can still be done in a for loop.</p><p>Odd loops would pass in [0,4,8,12], [1,5,9,13], [2,6,10,14], then [3,7,11,15]. This means that the output of the first clock cycle is the new [0,4,8,12], however the first cycle of the next even round needs [0, 5, 10, 15], meaning we need to wait until the 4th cycle of the previous round. This is done by writing them 1 at a time to the state_out register to their respective locations in the 512 bit register. Then, when all 512 bits are ready, it gets passed in one cycle to the next block, where it is then split up again.</p><p>As it only takes 4 cycles to to do a complete round, and the QR is 8 cycles deep, it will be possible to have multiple rounds in a quarter cycle simultaneously.&nbsp;</p>" style="text;html=1;whiteSpace=wrap;overflow=hidden;rounded=0;" vertex="1" parent="1">
|
||||
<mxGeometry x="80" y="160" width="680" height="330" as="geometry" />
|
||||
</mxCell>
|
||||
</root>
|
||||
</mxGraphModel>
|
||||
</diagram>
|
||||
</mxfile>
|
||||
@@ -4,4 +4,16 @@ tests:
|
||||
modules:
|
||||
- "chacha20_block"
|
||||
sources: "sources.list"
|
||||
waves: True
|
||||
- name: "chacha20_pipelined_round"
|
||||
toplevel: "chacha20_pipelined_round"
|
||||
modules:
|
||||
- "chacha20_pipelined_round"
|
||||
sources: "sources.list"
|
||||
waves: True
|
||||
- name: "chacha20_pipelined_block"
|
||||
toplevel: "chacha20_pipelined_block"
|
||||
modules:
|
||||
- "chacha20_pipelined_block"
|
||||
sources: "sources.list"
|
||||
waves: True
|
||||
119
ChaCha20_Poly1305_64/sim/chacha20_pipelined_block.py
Normal file
119
ChaCha20_Poly1305_64/sim/chacha20_pipelined_block.py
Normal file
@@ -0,0 +1,119 @@
|
||||
import cocotb
|
||||
|
||||
import logging
|
||||
|
||||
from chacha_helpers import chacha_block
|
||||
|
||||
import struct
|
||||
|
||||
import random
|
||||
|
||||
|
||||
from cocotb.clock import Clock
|
||||
from cocotb.triggers import Timer, RisingEdge, FallingEdge
|
||||
from cocotb.queue import Queue
|
||||
|
||||
CLK_PERIOD = 4
|
||||
|
||||
CONSTANT = [0x65787061, 0x6e642033, 0x322d6279, 0x7465206b]
|
||||
|
||||
|
||||
class TB:
|
||||
def __init__(self, dut):
|
||||
self.dut = dut
|
||||
|
||||
self.log = logging.getLogger("cocotb.tb")
|
||||
self.log.setLevel(logging.INFO)
|
||||
|
||||
self.input_queue = Queue()
|
||||
|
||||
self.expected_queue = Queue()
|
||||
self.output_queue = Queue()
|
||||
|
||||
cocotb.start_soon(Clock(self.dut.i_clk, CLK_PERIOD, units="ns").start())
|
||||
|
||||
cocotb.start_soon(self.run_input())
|
||||
cocotb.start_soon(self.run_output())
|
||||
|
||||
async def cycle_reset(self):
|
||||
await self._cycle_reset(self.dut.i_rst, self.dut.i_clk)
|
||||
|
||||
async def _cycle_reset(self, rst, clk):
|
||||
rst.setimmediatevalue(0)
|
||||
await RisingEdge(clk)
|
||||
await RisingEdge(clk)
|
||||
rst.value = 1
|
||||
await RisingEdge(clk)
|
||||
await RisingEdge(clk)
|
||||
rst.value = 0
|
||||
await RisingEdge(clk)
|
||||
await RisingEdge(clk)
|
||||
|
||||
async def write_input(self, key, counter, nonce):
|
||||
await self.input_queue.put((key, counter, nonce))
|
||||
|
||||
|
||||
data_in = CONSTANT[:]
|
||||
data_in.extend(struct.unpack("8I", key.to_bytes(32, "little")))
|
||||
data_in.extend(struct.unpack("2I", counter.to_bytes(8, "little")))
|
||||
data_in.extend(struct.unpack("2I", nonce.to_bytes(8, "little")))
|
||||
|
||||
data_out = chacha_block(data_in)
|
||||
|
||||
await self.expected_queue.put(data_out)
|
||||
|
||||
async def run_input(self):
|
||||
while True:
|
||||
key, counter, nonce = await self.input_queue.get()
|
||||
|
||||
self.dut.i_key.value = key
|
||||
self.dut.i_counter.value = counter
|
||||
self.dut.i_nonce.value = nonce
|
||||
self.dut.i_ready.value = 1
|
||||
self.dut.i_valid.value = 1
|
||||
await RisingEdge(self.dut.i_clk)
|
||||
self.dut.i_valid.value = 0
|
||||
|
||||
async def run_output(self):
|
||||
while True:
|
||||
await RisingEdge(self.dut.i_clk)
|
||||
if self.dut.o_valid.value:
|
||||
state = self.dut.o_state.value.integer
|
||||
|
||||
state_bytes = int.to_bytes(state, 64, "little")
|
||||
state_words = struct.unpack("16I", state_bytes)
|
||||
|
||||
await self.output_queue.put(state_words)
|
||||
|
||||
@cocotb.test
|
||||
async def test_sanity(dut):
|
||||
tb = TB(dut)
|
||||
|
||||
await tb.cycle_reset()
|
||||
|
||||
count = 1
|
||||
|
||||
for i in range(count):
|
||||
key = random.randint(0, 2**256-1)
|
||||
key = 0
|
||||
counter = i
|
||||
nonce = random.randint(0, 2**64-1)
|
||||
nonce = 0
|
||||
|
||||
await tb.write_input(key, counter, nonce)
|
||||
|
||||
|
||||
|
||||
fail = False
|
||||
for _ in range(count):
|
||||
sim_vals = await tb.expected_queue.get()
|
||||
dut_vals = await tb.output_queue.get()
|
||||
|
||||
for i, (sim_val, dut_val) in enumerate(zip(sim_vals, dut_vals)):
|
||||
if sim_val != dut_val:
|
||||
tb.log.info(f"{i}: {sim_val:x} -> {dut_val:x}")
|
||||
fail = True
|
||||
|
||||
await Timer(1, "us")
|
||||
|
||||
assert not fail
|
||||
85
ChaCha20_Poly1305_64/sim/chacha20_pipelined_round.py
Normal file
85
ChaCha20_Poly1305_64/sim/chacha20_pipelined_round.py
Normal file
@@ -0,0 +1,85 @@
|
||||
import cocotb
|
||||
|
||||
import logging
|
||||
|
||||
from chacha_helpers import chacha_block, QR
|
||||
|
||||
import struct
|
||||
|
||||
import random
|
||||
|
||||
|
||||
from cocotb.clock import Clock
|
||||
from cocotb.triggers import Timer, RisingEdge, FallingEdge
|
||||
from cocotb.queue import Queue
|
||||
|
||||
CLK_PERIOD = 4
|
||||
|
||||
CONSTANT = [0x65787061, 0x6e642033, 0x322d6279, 0x7465206b]
|
||||
|
||||
|
||||
class TB:
|
||||
def __init__(self, dut):
|
||||
self.dut = dut
|
||||
|
||||
self.log = logging.getLogger("cocotb.tb")
|
||||
self.log.setLevel(logging.INFO)
|
||||
|
||||
cocotb.start_soon(Clock(self.dut.i_clk, CLK_PERIOD, units="ns").start())
|
||||
|
||||
|
||||
async def cycle_reset(self):
|
||||
await self._cycle_reset(self.dut.i_rst, self.dut.i_clk)
|
||||
|
||||
async def _cycle_reset(self, rst, clk):
|
||||
rst.setimmediatevalue(0)
|
||||
await RisingEdge(clk)
|
||||
await RisingEdge(clk)
|
||||
rst.value = 1
|
||||
await RisingEdge(clk)
|
||||
await RisingEdge(clk)
|
||||
rst.value = 0
|
||||
await RisingEdge(clk)
|
||||
await RisingEdge(clk)
|
||||
|
||||
@cocotb.test
|
||||
async def test_sanity(dut):
|
||||
tb = TB(dut)
|
||||
|
||||
await tb.cycle_reset()
|
||||
|
||||
# data_in = [0x65787061, 0x6e642033, 0x322d6279, 0x7465206b,
|
||||
# 0, 0, 0, 0,
|
||||
# 0, 0, 0, 0,
|
||||
# 0, 0, 0, 0]
|
||||
|
||||
data = [1, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0]
|
||||
|
||||
# data[0], data[4], data[8], data[12] = QR(data[0], data[4], data[8], data[12])
|
||||
# data[1], data[5], data[9], data[13] = QR(data[1], data[5], data[9], data[13])
|
||||
# data[2], data[6], data[10], data[14] = QR(data[2], data[6], data[10], data[14])
|
||||
# data[3], data[7], data[11], data[15] = QR(data[3], data[7], data[11], data[15])
|
||||
|
||||
data[0], data[5], data[10], data[15] = QR(data[0], data[5], data[10], data[15])
|
||||
data[1], data[6], data[11], data[12] = QR(data[1], data[6], data[11], data[12])
|
||||
data[2], data[7], data[8], data[13] = QR(data[2], data[7], data[8], data[13])
|
||||
data[3], data[4], data[9], data[14] = QR(data[3], data[4], data[9], data[14])
|
||||
|
||||
for val in data:
|
||||
tb.log.info(f"{val:x}")
|
||||
|
||||
# tb.log.info(data_out)
|
||||
|
||||
tb.dut.i_ready.value = 1
|
||||
|
||||
tb.dut.i_state.value = 1
|
||||
# tb.dut.i_state.value = 0x657870616e642033322d62797465206b
|
||||
|
||||
tb.dut.i_valid.value = 1
|
||||
await RisingEdge(tb.dut.i_clk)
|
||||
tb.dut.i_valid.value = 1
|
||||
|
||||
await Timer(1, "us")
|
||||
137
ChaCha20_Poly1305_64/src/chacha20_pipelined_block.sv
Normal file
137
ChaCha20_Poly1305_64/src/chacha20_pipelined_block.sv
Normal file
@@ -0,0 +1,137 @@
|
||||
module chacha20_pipelined_block #(
|
||||
parameter KEY_SIZE = 256,
|
||||
parameter COUNTER_SIZE = 64,
|
||||
parameter NONCE_SIZE = 64,
|
||||
parameter STATE_SIZE = 512,
|
||||
parameter ROUNDS = 20,
|
||||
parameter CONSTANT = 128'h657870616e642033322d62797465206b
|
||||
)(
|
||||
input logic i_clk,
|
||||
input logic i_rst,
|
||||
|
||||
input logic [KEY_SIZE-1:0] i_key,
|
||||
input logic [COUNTER_SIZE-1:0] i_counter,
|
||||
input logic [NONCE_SIZE-1:0] i_nonce,
|
||||
input logic i_valid,
|
||||
output logic o_ready,
|
||||
|
||||
|
||||
output logic [STATE_SIZE-1:0] o_state,
|
||||
output logic o_valid,
|
||||
input logic i_ready
|
||||
);
|
||||
|
||||
// each round is 8 stages
|
||||
localparam PIPE_STAGES = ROUNDS * 8;
|
||||
|
||||
logic [511:0] state [ROUNDS+1];
|
||||
logic valid[ROUNDS+1];
|
||||
logic ready[ROUNDS+1];
|
||||
|
||||
// small fifo for storing the initial state.
|
||||
// better to store it in a memory than in flops
|
||||
logic [$clog2(PIPE_STAGES)-1:0] initial_state_wptr;
|
||||
logic [$clog2(PIPE_STAGES)-1:0] initial_state_rptr;
|
||||
logic [511:0] initial_states [PIPE_STAGES];
|
||||
|
||||
logic [511:0] state_pre_add;
|
||||
logic pre_add_valid;
|
||||
|
||||
logic [511:0] write_initial_state, read_initial_state;
|
||||
|
||||
logic [511:0] original_initial_state;
|
||||
|
||||
always_ff @(posedge i_clk) begin
|
||||
if (i_rst) begin
|
||||
initial_state_rptr <= '0;
|
||||
initial_state_wptr <= '0;
|
||||
end else begin
|
||||
if (i_valid) begin
|
||||
initial_states[initial_state_wptr] <= write_initial_state;
|
||||
if (initial_state_wptr < PIPE_STAGES-1) begin
|
||||
initial_state_wptr <= initial_state_wptr + 1;
|
||||
end else begin
|
||||
initial_state_wptr <= '0;
|
||||
end
|
||||
end
|
||||
|
||||
pre_add_valid <= valid[ROUNDS];
|
||||
|
||||
if (valid[ROUNDS]) begin
|
||||
read_initial_state <= initial_states[initial_state_rptr];
|
||||
if (initial_state_rptr < PIPE_STAGES-1) begin
|
||||
initial_state_rptr <= initial_state_rptr + 1;
|
||||
end else begin
|
||||
initial_state_rptr <= '0;
|
||||
end
|
||||
state_pre_add <= state[ROUNDS];
|
||||
end
|
||||
|
||||
|
||||
o_valid <= pre_add_valid;
|
||||
|
||||
// We cannot just add state_pre_add and read_initial state
|
||||
// because the addition needs to be done wordwise, with no
|
||||
// carries between 32 bit groups.
|
||||
for (int i = 0; i < 16; i++) begin
|
||||
o_state[i*32 +: 32] <= state_pre_add[i*32 +: 32] + read_initial_state[i*32 +: 32];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
for (int i = 0; i < 4; i++) begin
|
||||
state[0][32*(3-i) +: 32] = CONSTANT[32*(3-i) +: 32]; // constant is big endian
|
||||
end
|
||||
|
||||
for (int i = 0; i < 8; i++) begin
|
||||
state[0][32*(i+4) +: 32] = i_key[32*i +: 32];
|
||||
end
|
||||
|
||||
state[0][12*32 +: 32] = i_counter[0 +: 32];
|
||||
state[0][13*32 +: 32] = i_counter[32 +: 32];
|
||||
|
||||
state[0][14*32 +: 32] = i_nonce[0 +: 32];
|
||||
state[0][15*32 +: 32] = i_nonce[32 +: 32];
|
||||
|
||||
valid[0] = i_valid;
|
||||
|
||||
o_ready = ready[0];
|
||||
|
||||
|
||||
write_initial_state = state[0];
|
||||
original_initial_state = read_initial_state;
|
||||
end
|
||||
|
||||
|
||||
generate
|
||||
for (genvar round = 0; round < ROUNDS; round+=2) begin : ROUND_LOOP
|
||||
chacha20_pipelined_round #(.IS_EVEN(0)) chacha20_pipelined_round_inst_odd (
|
||||
.i_clk (i_clk),
|
||||
.i_rst (i_rst),
|
||||
|
||||
.i_state (state[round]),
|
||||
.i_valid (valid[round]),
|
||||
.o_ready (ready[round]),
|
||||
|
||||
.o_state (state[round+1]),
|
||||
.o_valid (valid[round+1]),
|
||||
.i_ready (ready[round+1])
|
||||
);
|
||||
|
||||
chacha20_pipelined_round #(.IS_EVEN(1)) chacha20_pipelined_round_inst_even (
|
||||
.i_clk (i_clk),
|
||||
.i_rst (i_rst),
|
||||
|
||||
.i_state (state[round+1]),
|
||||
.i_valid (valid[round+1]),
|
||||
.o_ready (ready[round+1]),
|
||||
|
||||
.o_state (state[round+2]),
|
||||
.o_valid (valid[round+2]),
|
||||
.i_ready (ready[round+2])
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
||||
219
ChaCha20_Poly1305_64/src/chacha20_pipelined_round.sv
Normal file
219
ChaCha20_Poly1305_64/src/chacha20_pipelined_round.sv
Normal file
@@ -0,0 +1,219 @@
|
||||
module chacha20_pipelined_round #(
|
||||
parameter IS_EVEN = 1,
|
||||
parameter KEY_SIZE = 256,
|
||||
parameter COUNTER_SIZE = 64,
|
||||
parameter NONCE_SIZE = 64,
|
||||
parameter STATE_SIZE = 512,
|
||||
parameter ROUNDS = 20,
|
||||
parameter CONSTANT = 128'h657870616e642033322d62797465206b
|
||||
)(
|
||||
input logic i_clk,
|
||||
input logic i_rst,
|
||||
|
||||
input logic [STATE_SIZE-1:0] i_state,
|
||||
input logic i_valid,
|
||||
output logic o_ready,
|
||||
|
||||
output logic [STATE_SIZE-1:0] o_state,
|
||||
output logic o_valid,
|
||||
input logic i_ready
|
||||
);
|
||||
|
||||
logic [31:0] state_in [16];
|
||||
logic [31:0] state_out [16];
|
||||
|
||||
logic [31:0] qr_a_i, qr_a_o;
|
||||
logic [31:0] qr_b_i, qr_b_o;
|
||||
logic [31:0] qr_c_i, qr_c_o;
|
||||
logic [31:0] qr_d_i, qr_d_o;
|
||||
|
||||
logic qr_i_valid, qr_o_valid;
|
||||
logic qr_o_ready, qr_i_ready;
|
||||
|
||||
// state is shared between the in fsm and the out fsm
|
||||
enum logic [1:0] {STATE_IN, STATE_SHIFT, STATE_OUT} in_fsm_state, out_fsm_state;
|
||||
logic [1:0] in_counter;
|
||||
logic [1:0] out_counter;
|
||||
|
||||
`define QR_IN(a, b, c, d) \
|
||||
qr_a_i = state_in[a]; \
|
||||
qr_b_i = state_in[b]; \
|
||||
qr_c_i = state_in[c]; \
|
||||
qr_d_i = state_in[d]
|
||||
|
||||
`define QR_OUT(a, b, c, d) \
|
||||
state_out[a] <= qr_a_o; \
|
||||
state_out[b] <= qr_b_o; \
|
||||
state_out[c] <= qr_c_o; \
|
||||
state_out[d] <= qr_d_o
|
||||
|
||||
always_comb begin
|
||||
if (IS_EVEN) begin
|
||||
case (in_counter)
|
||||
0: begin
|
||||
`QR_IN(0, 5, 10, 15);
|
||||
end
|
||||
|
||||
1: begin
|
||||
`QR_IN(1, 5, 9, 13);
|
||||
end
|
||||
|
||||
2: begin
|
||||
`QR_IN(2, 6, 10, 14);
|
||||
end
|
||||
|
||||
3: begin
|
||||
`QR_IN(3, 7, 11, 15);
|
||||
end
|
||||
endcase
|
||||
end else begin
|
||||
case (in_counter)
|
||||
0: begin
|
||||
`QR_IN(0, 4, 8, 12);
|
||||
end
|
||||
|
||||
1: begin
|
||||
`QR_IN(1, 5, 9, 13);
|
||||
end
|
||||
|
||||
2: begin
|
||||
`QR_IN(2, 6, 10, 14);
|
||||
end
|
||||
|
||||
3: begin
|
||||
`QR_IN(3, 7, 11, 15);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
for (int i = 0; i < 16; i++) begin
|
||||
o_state[32*i +: 32] = state_out[i];
|
||||
end
|
||||
end
|
||||
|
||||
chacha20_qr u_chacha20_qr (
|
||||
.i_clk (i_clk),
|
||||
.i_rst (i_rst),
|
||||
|
||||
.i_valid (qr_i_valid),
|
||||
.o_ready (qr_o_ready),
|
||||
.a_i (qr_a_i),
|
||||
.b_i (qr_b_i),
|
||||
.c_i (qr_c_i),
|
||||
.d_i (qr_d_i),
|
||||
|
||||
.o_valid (qr_o_valid),
|
||||
.i_ready (qr_i_ready),
|
||||
.a_o (qr_a_o),
|
||||
.b_o (qr_b_o),
|
||||
.c_o (qr_c_o),
|
||||
.d_o (qr_d_o)
|
||||
);
|
||||
|
||||
assign o_ready = in_fsm_state == STATE_IN;
|
||||
assign qr_i_valid = in_fsm_state == STATE_SHIFT;
|
||||
|
||||
assign qr_i_ready = out_fsm_state == STATE_SHIFT;
|
||||
assign o_valid = out_fsm_state == STATE_OUT;
|
||||
|
||||
always_ff @(posedge i_clk) begin
|
||||
if (i_rst) begin
|
||||
in_fsm_state <= STATE_IN;
|
||||
out_fsm_state <= STATE_SHIFT;
|
||||
|
||||
in_counter <= '0;
|
||||
out_counter <= '0;
|
||||
end else begin
|
||||
case (in_fsm_state)
|
||||
STATE_IN: begin
|
||||
if (i_valid) begin
|
||||
// Register incoming state as unpacked 32 bit words
|
||||
for (int i = 0; i < 16; i++) begin
|
||||
state_in[i] = i_state[32*i +: 32];
|
||||
end
|
||||
|
||||
in_counter <= '0;
|
||||
in_fsm_state <= STATE_SHIFT;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_SHIFT: begin
|
||||
if (qr_o_ready) begin
|
||||
in_counter <= in_counter + 1;
|
||||
|
||||
if (in_counter == 3) begin
|
||||
in_fsm_state <= STATE_IN;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
in_fsm_state <= STATE_IN;
|
||||
end
|
||||
endcase
|
||||
|
||||
|
||||
case (out_fsm_state)
|
||||
STATE_SHIFT: begin
|
||||
if (qr_o_valid) begin
|
||||
if (IS_EVEN) begin
|
||||
case (out_counter)
|
||||
0: begin
|
||||
`QR_OUT(0, 5, 10, 15);
|
||||
end
|
||||
|
||||
1: begin
|
||||
`QR_OUT(1, 6, 11, 12);
|
||||
end
|
||||
|
||||
2: begin
|
||||
`QR_OUT(2, 7, 8, 13);
|
||||
end
|
||||
|
||||
3: begin
|
||||
`QR_OUT(3, 4, 9, 14);
|
||||
end
|
||||
endcase
|
||||
end else begin
|
||||
case (out_counter)
|
||||
0: begin
|
||||
`QR_OUT(0, 4, 8, 12);
|
||||
end
|
||||
|
||||
1: begin
|
||||
`QR_OUT(1, 5, 9, 13);
|
||||
end
|
||||
|
||||
2: begin
|
||||
`QR_OUT(2, 6, 10, 14);
|
||||
end
|
||||
|
||||
3: begin
|
||||
`QR_OUT(3, 7, 11, 15);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
out_counter <= out_counter + 1;
|
||||
|
||||
if (out_counter == 3) begin
|
||||
out_fsm_state <= STATE_OUT;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
STATE_OUT: begin
|
||||
if (i_ready) begin
|
||||
out_fsm_state <= STATE_SHIFT;
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
out_fsm_state <= STATE_SHIFT;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
endmodule
|
||||
@@ -1,2 +1,4 @@
|
||||
chacha20_qr.sv
|
||||
chacha20_block.sv
|
||||
chacha20_block.sv
|
||||
chacha20_pipelined_round.sv
|
||||
chacha20_pipelined_block.sv
|
||||
Reference in New Issue
Block a user