First shot at 1/4 version

This commit is contained in:
Byron Lathi
2025-07-02 06:32:58 -07:00
parent 196ea8e6d3
commit a617277005
8 changed files with 733 additions and 2 deletions

View File

@@ -98,4 +98,7 @@ Actualyl its 88k luts... its 512ff * 4 * 20 = 40k ff
Lets just leave it for now even if its overkill. The hardware would support up to
40Gbps, and technically the FPGA has 16 lanes so could do 160Gbps in total, if
we designed a custom board for it (or 120 if we used FMC connectors).
we designed a custom board for it (or 120 if we used FMC connectors).
If we only use a single quarter round multiplexed between all 4, then the same
quarter round module can have 2 different blocks going through it at once.

View File

@@ -0,0 +1,154 @@
<mxfile host="Electron" agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/25.0.2 Chrome/128.0.6613.186 Electron/32.2.5 Safari/537.36" version="25.0.2">
<diagram name="Page-1" id="de-ffv5K_z_w-HYk_-7N">
<mxGraphModel dx="721" dy="1186" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
<root>
<mxCell id="0" />
<mxCell id="1" parent="0" />
<mxCell id="ZoMeok9N2fHc0OsoVYq9-17" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;" edge="1" parent="1" source="ZoMeok9N2fHc0OsoVYq9-1" target="ZoMeok9N2fHc0OsoVYq9-16">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-1" value="&lt;div&gt;Quarter Round&lt;/div&gt;" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="200" y="60" width="80" height="40" as="geometry" />
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-2" value="state in" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="80" y="40" width="40" height="80" as="geometry" />
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-5" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="ZoMeok9N2fHc0OsoVYq9-4" target="ZoMeok9N2fHc0OsoVYq9-1">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-4" value="" style="shape=trapezoid;perimeter=trapezoidPerimeter;whiteSpace=wrap;html=1;fixedSize=1;rotation=90;" vertex="1" parent="1">
<mxGeometry x="115" y="65" width="80" height="30" as="geometry" />
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-6" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1" source="ZoMeok9N2fHc0OsoVYq9-2">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="170" y="100" as="sourcePoint" />
<mxPoint x="140" y="50" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-7" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="120" y="70" as="sourcePoint" />
<mxPoint x="140" y="70" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-8" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="120" y="90" as="sourcePoint" />
<mxPoint x="140" y="90" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-9" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="120" y="110" as="sourcePoint" />
<mxPoint x="140" y="110" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-12" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="340" y="49.88511627906976" as="sourcePoint" />
<mxPoint x="360" y="49.88511627906976" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-13" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="340" y="69.88511627906988" as="sourcePoint" />
<mxPoint x="360" y="69.88511627906988" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-14" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="340" y="89.88511627906988" as="sourcePoint" />
<mxPoint x="360" y="89.88511627906988" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-15" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="340" y="109.88511627906988" as="sourcePoint" />
<mxPoint x="360" y="109.88511627906988" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-16" value="" style="shape=trapezoid;perimeter=trapezoidPerimeter;whiteSpace=wrap;html=1;fixedSize=1;rotation=90;flipV=1;" vertex="1" parent="1">
<mxGeometry x="285" y="65" width="80" height="30" as="geometry" />
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-34" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="ZoMeok9N2fHc0OsoVYq9-18" target="ZoMeok9N2fHc0OsoVYq9-21">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-18" value="state out" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="360" y="40" width="40" height="80" as="geometry" />
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-19" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;" edge="1" parent="1" source="ZoMeok9N2fHc0OsoVYq9-20" target="ZoMeok9N2fHc0OsoVYq9-32">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-20" value="&lt;div&gt;Quarter Round&lt;/div&gt;" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="560" y="60" width="80" height="40" as="geometry" />
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-21" value="state in" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="440" y="40" width="40" height="80" as="geometry" />
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-22" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="ZoMeok9N2fHc0OsoVYq9-23" target="ZoMeok9N2fHc0OsoVYq9-20">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-23" value="" style="shape=trapezoid;perimeter=trapezoidPerimeter;whiteSpace=wrap;html=1;fixedSize=1;rotation=90;" vertex="1" parent="1">
<mxGeometry x="475" y="65" width="80" height="30" as="geometry" />
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-24" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1" source="ZoMeok9N2fHc0OsoVYq9-21">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="530" y="100" as="sourcePoint" />
<mxPoint x="500" y="50" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-25" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="480" y="70" as="sourcePoint" />
<mxPoint x="500" y="70" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-26" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="480" y="90" as="sourcePoint" />
<mxPoint x="500" y="90" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-27" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="480" y="110" as="sourcePoint" />
<mxPoint x="500" y="110" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-28" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="700" y="49.88511627906976" as="sourcePoint" />
<mxPoint x="720" y="49.88511627906976" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-29" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="700" y="69.88511627906988" as="sourcePoint" />
<mxPoint x="720" y="69.88511627906988" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-30" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="700" y="89.88511627906988" as="sourcePoint" />
<mxPoint x="720" y="89.88511627906988" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-31" value="" style="endArrow=classic;html=1;rounded=0;exitX=1;exitY=0.125;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="700" y="109.88511627906988" as="sourcePoint" />
<mxPoint x="720" y="109.88511627906988" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-32" value="" style="shape=trapezoid;perimeter=trapezoidPerimeter;whiteSpace=wrap;html=1;fixedSize=1;rotation=90;flipV=1;" vertex="1" parent="1">
<mxGeometry x="645" y="65" width="80" height="30" as="geometry" />
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-33" value="state out" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
<mxGeometry x="720" y="40" width="40" height="80" as="geometry" />
</mxCell>
<mxCell id="ZoMeok9N2fHc0OsoVYq9-35" value="&lt;h1 style=&quot;margin-top: 0px;&quot;&gt;State Blocks&lt;/h1&gt;&lt;p&gt;Each round shuffles the state, so we cannot simply pipeline the quarter roundds all the way through. Before each round, we must regroup all of the words int a single 512 bit state, then separate them again into the desired words to put into the quarter round. Even and odd rounds use different words, but every even round and every odd round is the same, so this can still be done in a for loop.&lt;/p&gt;&lt;p&gt;Odd loops would pass in [0,4,8,12], [1,5,9,13], [2,6,10,14], then [3,7,11,15]. This means that the output of the first clock cycle is the new [0,4,8,12], however the first cycle of the next even round needs [0, 5, 10, 15], meaning we need to wait until the 4th cycle of the previous round. This is done by writing them 1 at a time to the state_out register to their respective locations in the 512 bit register. Then, when all 512 bits are ready, it gets passed in one cycle to the next block, where it is then split up again.&lt;/p&gt;&lt;p&gt;As it only takes 4 cycles to to do a complete round, and the QR is 8 cycles deep, it will be possible to have multiple rounds in a quarter cycle simultaneously.&amp;nbsp;&lt;/p&gt;" style="text;html=1;whiteSpace=wrap;overflow=hidden;rounded=0;" vertex="1" parent="1">
<mxGeometry x="80" y="160" width="680" height="330" as="geometry" />
</mxCell>
</root>
</mxGraphModel>
</diagram>
</mxfile>

View File

@@ -4,4 +4,16 @@ tests:
modules:
- "chacha20_block"
sources: "sources.list"
waves: True
- name: "chacha20_pipelined_round"
toplevel: "chacha20_pipelined_round"
modules:
- "chacha20_pipelined_round"
sources: "sources.list"
waves: True
- name: "chacha20_pipelined_block"
toplevel: "chacha20_pipelined_block"
modules:
- "chacha20_pipelined_block"
sources: "sources.list"
waves: True

View File

@@ -0,0 +1,119 @@
import cocotb
import logging
from chacha_helpers import chacha_block
import struct
import random
from cocotb.clock import Clock
from cocotb.triggers import Timer, RisingEdge, FallingEdge
from cocotb.queue import Queue
CLK_PERIOD = 4
CONSTANT = [0x65787061, 0x6e642033, 0x322d6279, 0x7465206b]
class TB:
def __init__(self, dut):
self.dut = dut
self.log = logging.getLogger("cocotb.tb")
self.log.setLevel(logging.INFO)
self.input_queue = Queue()
self.expected_queue = Queue()
self.output_queue = Queue()
cocotb.start_soon(Clock(self.dut.i_clk, CLK_PERIOD, units="ns").start())
cocotb.start_soon(self.run_input())
cocotb.start_soon(self.run_output())
async def cycle_reset(self):
await self._cycle_reset(self.dut.i_rst, self.dut.i_clk)
async def _cycle_reset(self, rst, clk):
rst.setimmediatevalue(0)
await RisingEdge(clk)
await RisingEdge(clk)
rst.value = 1
await RisingEdge(clk)
await RisingEdge(clk)
rst.value = 0
await RisingEdge(clk)
await RisingEdge(clk)
async def write_input(self, key, counter, nonce):
await self.input_queue.put((key, counter, nonce))
data_in = CONSTANT[:]
data_in.extend(struct.unpack("8I", key.to_bytes(32, "little")))
data_in.extend(struct.unpack("2I", counter.to_bytes(8, "little")))
data_in.extend(struct.unpack("2I", nonce.to_bytes(8, "little")))
data_out = chacha_block(data_in)
await self.expected_queue.put(data_out)
async def run_input(self):
while True:
key, counter, nonce = await self.input_queue.get()
self.dut.i_key.value = key
self.dut.i_counter.value = counter
self.dut.i_nonce.value = nonce
self.dut.i_ready.value = 1
self.dut.i_valid.value = 1
await RisingEdge(self.dut.i_clk)
self.dut.i_valid.value = 0
async def run_output(self):
while True:
await RisingEdge(self.dut.i_clk)
if self.dut.o_valid.value:
state = self.dut.o_state.value.integer
state_bytes = int.to_bytes(state, 64, "little")
state_words = struct.unpack("16I", state_bytes)
await self.output_queue.put(state_words)
@cocotb.test
async def test_sanity(dut):
tb = TB(dut)
await tb.cycle_reset()
count = 1
for i in range(count):
key = random.randint(0, 2**256-1)
key = 0
counter = i
nonce = random.randint(0, 2**64-1)
nonce = 0
await tb.write_input(key, counter, nonce)
fail = False
for _ in range(count):
sim_vals = await tb.expected_queue.get()
dut_vals = await tb.output_queue.get()
for i, (sim_val, dut_val) in enumerate(zip(sim_vals, dut_vals)):
if sim_val != dut_val:
tb.log.info(f"{i}: {sim_val:x} -> {dut_val:x}")
fail = True
await Timer(1, "us")
assert not fail

View File

@@ -0,0 +1,85 @@
import cocotb
import logging
from chacha_helpers import chacha_block, QR
import struct
import random
from cocotb.clock import Clock
from cocotb.triggers import Timer, RisingEdge, FallingEdge
from cocotb.queue import Queue
CLK_PERIOD = 4
CONSTANT = [0x65787061, 0x6e642033, 0x322d6279, 0x7465206b]
class TB:
def __init__(self, dut):
self.dut = dut
self.log = logging.getLogger("cocotb.tb")
self.log.setLevel(logging.INFO)
cocotb.start_soon(Clock(self.dut.i_clk, CLK_PERIOD, units="ns").start())
async def cycle_reset(self):
await self._cycle_reset(self.dut.i_rst, self.dut.i_clk)
async def _cycle_reset(self, rst, clk):
rst.setimmediatevalue(0)
await RisingEdge(clk)
await RisingEdge(clk)
rst.value = 1
await RisingEdge(clk)
await RisingEdge(clk)
rst.value = 0
await RisingEdge(clk)
await RisingEdge(clk)
@cocotb.test
async def test_sanity(dut):
tb = TB(dut)
await tb.cycle_reset()
# data_in = [0x65787061, 0x6e642033, 0x322d6279, 0x7465206b,
# 0, 0, 0, 0,
# 0, 0, 0, 0,
# 0, 0, 0, 0]
data = [1, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0]
# data[0], data[4], data[8], data[12] = QR(data[0], data[4], data[8], data[12])
# data[1], data[5], data[9], data[13] = QR(data[1], data[5], data[9], data[13])
# data[2], data[6], data[10], data[14] = QR(data[2], data[6], data[10], data[14])
# data[3], data[7], data[11], data[15] = QR(data[3], data[7], data[11], data[15])
data[0], data[5], data[10], data[15] = QR(data[0], data[5], data[10], data[15])
data[1], data[6], data[11], data[12] = QR(data[1], data[6], data[11], data[12])
data[2], data[7], data[8], data[13] = QR(data[2], data[7], data[8], data[13])
data[3], data[4], data[9], data[14] = QR(data[3], data[4], data[9], data[14])
for val in data:
tb.log.info(f"{val:x}")
# tb.log.info(data_out)
tb.dut.i_ready.value = 1
tb.dut.i_state.value = 1
# tb.dut.i_state.value = 0x657870616e642033322d62797465206b
tb.dut.i_valid.value = 1
await RisingEdge(tb.dut.i_clk)
tb.dut.i_valid.value = 1
await Timer(1, "us")

View File

@@ -0,0 +1,137 @@
module chacha20_pipelined_block #(
parameter KEY_SIZE = 256,
parameter COUNTER_SIZE = 64,
parameter NONCE_SIZE = 64,
parameter STATE_SIZE = 512,
parameter ROUNDS = 20,
parameter CONSTANT = 128'h657870616e642033322d62797465206b
)(
input logic i_clk,
input logic i_rst,
input logic [KEY_SIZE-1:0] i_key,
input logic [COUNTER_SIZE-1:0] i_counter,
input logic [NONCE_SIZE-1:0] i_nonce,
input logic i_valid,
output logic o_ready,
output logic [STATE_SIZE-1:0] o_state,
output logic o_valid,
input logic i_ready
);
// each round is 8 stages
localparam PIPE_STAGES = ROUNDS * 8;
logic [511:0] state [ROUNDS+1];
logic valid[ROUNDS+1];
logic ready[ROUNDS+1];
// small fifo for storing the initial state.
// better to store it in a memory than in flops
logic [$clog2(PIPE_STAGES)-1:0] initial_state_wptr;
logic [$clog2(PIPE_STAGES)-1:0] initial_state_rptr;
logic [511:0] initial_states [PIPE_STAGES];
logic [511:0] state_pre_add;
logic pre_add_valid;
logic [511:0] write_initial_state, read_initial_state;
logic [511:0] original_initial_state;
always_ff @(posedge i_clk) begin
if (i_rst) begin
initial_state_rptr <= '0;
initial_state_wptr <= '0;
end else begin
if (i_valid) begin
initial_states[initial_state_wptr] <= write_initial_state;
if (initial_state_wptr < PIPE_STAGES-1) begin
initial_state_wptr <= initial_state_wptr + 1;
end else begin
initial_state_wptr <= '0;
end
end
pre_add_valid <= valid[ROUNDS];
if (valid[ROUNDS]) begin
read_initial_state <= initial_states[initial_state_rptr];
if (initial_state_rptr < PIPE_STAGES-1) begin
initial_state_rptr <= initial_state_rptr + 1;
end else begin
initial_state_rptr <= '0;
end
state_pre_add <= state[ROUNDS];
end
o_valid <= pre_add_valid;
// We cannot just add state_pre_add and read_initial state
// because the addition needs to be done wordwise, with no
// carries between 32 bit groups.
for (int i = 0; i < 16; i++) begin
o_state[i*32 +: 32] <= state_pre_add[i*32 +: 32] + read_initial_state[i*32 +: 32];
end
end
end
always_comb begin
for (int i = 0; i < 4; i++) begin
state[0][32*(3-i) +: 32] = CONSTANT[32*(3-i) +: 32]; // constant is big endian
end
for (int i = 0; i < 8; i++) begin
state[0][32*(i+4) +: 32] = i_key[32*i +: 32];
end
state[0][12*32 +: 32] = i_counter[0 +: 32];
state[0][13*32 +: 32] = i_counter[32 +: 32];
state[0][14*32 +: 32] = i_nonce[0 +: 32];
state[0][15*32 +: 32] = i_nonce[32 +: 32];
valid[0] = i_valid;
o_ready = ready[0];
write_initial_state = state[0];
original_initial_state = read_initial_state;
end
generate
for (genvar round = 0; round < ROUNDS; round+=2) begin : ROUND_LOOP
chacha20_pipelined_round #(.IS_EVEN(0)) chacha20_pipelined_round_inst_odd (
.i_clk (i_clk),
.i_rst (i_rst),
.i_state (state[round]),
.i_valid (valid[round]),
.o_ready (ready[round]),
.o_state (state[round+1]),
.o_valid (valid[round+1]),
.i_ready (ready[round+1])
);
chacha20_pipelined_round #(.IS_EVEN(1)) chacha20_pipelined_round_inst_even (
.i_clk (i_clk),
.i_rst (i_rst),
.i_state (state[round+1]),
.i_valid (valid[round+1]),
.o_ready (ready[round+1]),
.o_state (state[round+2]),
.o_valid (valid[round+2]),
.i_ready (ready[round+2])
);
end
endgenerate
endmodule

View File

@@ -0,0 +1,219 @@
module chacha20_pipelined_round #(
parameter IS_EVEN = 1,
parameter KEY_SIZE = 256,
parameter COUNTER_SIZE = 64,
parameter NONCE_SIZE = 64,
parameter STATE_SIZE = 512,
parameter ROUNDS = 20,
parameter CONSTANT = 128'h657870616e642033322d62797465206b
)(
input logic i_clk,
input logic i_rst,
input logic [STATE_SIZE-1:0] i_state,
input logic i_valid,
output logic o_ready,
output logic [STATE_SIZE-1:0] o_state,
output logic o_valid,
input logic i_ready
);
logic [31:0] state_in [16];
logic [31:0] state_out [16];
logic [31:0] qr_a_i, qr_a_o;
logic [31:0] qr_b_i, qr_b_o;
logic [31:0] qr_c_i, qr_c_o;
logic [31:0] qr_d_i, qr_d_o;
logic qr_i_valid, qr_o_valid;
logic qr_o_ready, qr_i_ready;
// state is shared between the in fsm and the out fsm
enum logic [1:0] {STATE_IN, STATE_SHIFT, STATE_OUT} in_fsm_state, out_fsm_state;
logic [1:0] in_counter;
logic [1:0] out_counter;
`define QR_IN(a, b, c, d) \
qr_a_i = state_in[a]; \
qr_b_i = state_in[b]; \
qr_c_i = state_in[c]; \
qr_d_i = state_in[d]
`define QR_OUT(a, b, c, d) \
state_out[a] <= qr_a_o; \
state_out[b] <= qr_b_o; \
state_out[c] <= qr_c_o; \
state_out[d] <= qr_d_o
always_comb begin
if (IS_EVEN) begin
case (in_counter)
0: begin
`QR_IN(0, 5, 10, 15);
end
1: begin
`QR_IN(1, 5, 9, 13);
end
2: begin
`QR_IN(2, 6, 10, 14);
end
3: begin
`QR_IN(3, 7, 11, 15);
end
endcase
end else begin
case (in_counter)
0: begin
`QR_IN(0, 4, 8, 12);
end
1: begin
`QR_IN(1, 5, 9, 13);
end
2: begin
`QR_IN(2, 6, 10, 14);
end
3: begin
`QR_IN(3, 7, 11, 15);
end
endcase
end
for (int i = 0; i < 16; i++) begin
o_state[32*i +: 32] = state_out[i];
end
end
chacha20_qr u_chacha20_qr (
.i_clk (i_clk),
.i_rst (i_rst),
.i_valid (qr_i_valid),
.o_ready (qr_o_ready),
.a_i (qr_a_i),
.b_i (qr_b_i),
.c_i (qr_c_i),
.d_i (qr_d_i),
.o_valid (qr_o_valid),
.i_ready (qr_i_ready),
.a_o (qr_a_o),
.b_o (qr_b_o),
.c_o (qr_c_o),
.d_o (qr_d_o)
);
assign o_ready = in_fsm_state == STATE_IN;
assign qr_i_valid = in_fsm_state == STATE_SHIFT;
assign qr_i_ready = out_fsm_state == STATE_SHIFT;
assign o_valid = out_fsm_state == STATE_OUT;
always_ff @(posedge i_clk) begin
if (i_rst) begin
in_fsm_state <= STATE_IN;
out_fsm_state <= STATE_SHIFT;
in_counter <= '0;
out_counter <= '0;
end else begin
case (in_fsm_state)
STATE_IN: begin
if (i_valid) begin
// Register incoming state as unpacked 32 bit words
for (int i = 0; i < 16; i++) begin
state_in[i] = i_state[32*i +: 32];
end
in_counter <= '0;
in_fsm_state <= STATE_SHIFT;
end
end
STATE_SHIFT: begin
if (qr_o_ready) begin
in_counter <= in_counter + 1;
if (in_counter == 3) begin
in_fsm_state <= STATE_IN;
end
end
end
default: begin
in_fsm_state <= STATE_IN;
end
endcase
case (out_fsm_state)
STATE_SHIFT: begin
if (qr_o_valid) begin
if (IS_EVEN) begin
case (out_counter)
0: begin
`QR_OUT(0, 5, 10, 15);
end
1: begin
`QR_OUT(1, 6, 11, 12);
end
2: begin
`QR_OUT(2, 7, 8, 13);
end
3: begin
`QR_OUT(3, 4, 9, 14);
end
endcase
end else begin
case (out_counter)
0: begin
`QR_OUT(0, 4, 8, 12);
end
1: begin
`QR_OUT(1, 5, 9, 13);
end
2: begin
`QR_OUT(2, 6, 10, 14);
end
3: begin
`QR_OUT(3, 7, 11, 15);
end
endcase
end
out_counter <= out_counter + 1;
if (out_counter == 3) begin
out_fsm_state <= STATE_OUT;
end
end
end
STATE_OUT: begin
if (i_ready) begin
out_fsm_state <= STATE_SHIFT;
end
end
default: begin
out_fsm_state <= STATE_SHIFT;
end
endcase
end
end
endmodule

View File

@@ -1,2 +1,4 @@
chacha20_qr.sv
chacha20_block.sv
chacha20_block.sv
chacha20_pipelined_round.sv
chacha20_pipelined_block.sv