Switch to use regular non-namespaced package
This commit is contained in:
69
src/peakrdl_regblock/readback/__init__.py
Normal file
69
src/peakrdl_regblock/readback/__init__.py
Normal file
@@ -0,0 +1,69 @@
|
||||
from typing import TYPE_CHECKING
|
||||
import math
|
||||
|
||||
from .generators import ReadbackAssignmentGenerator
|
||||
from ..utils import get_always_ff_event
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..exporter import RegblockExporter
|
||||
from systemrdl.node import AddrmapNode
|
||||
|
||||
class Readback:
|
||||
def __init__(self, exp:'RegblockExporter', do_fanin_stage: bool):
|
||||
self.exp = exp
|
||||
self.do_fanin_stage = do_fanin_stage
|
||||
|
||||
@property
|
||||
def top_node(self) -> 'AddrmapNode':
|
||||
return self.exp.top_node
|
||||
|
||||
def get_implementation(self) -> str:
|
||||
gen = ReadbackAssignmentGenerator(self.exp)
|
||||
array_assignments = gen.get_content(self.top_node)
|
||||
array_size = gen.current_offset
|
||||
|
||||
# Enabling the fanin stage doesnt make sense if readback fanin is
|
||||
# small. This also avoids pesky corner cases
|
||||
if array_size < 4:
|
||||
self.do_fanin_stage = False
|
||||
|
||||
context = {
|
||||
"array_assignments" : array_assignments,
|
||||
"array_size" : array_size,
|
||||
"get_always_ff_event": lambda resetsignal : get_always_ff_event(self.exp.dereferencer, resetsignal),
|
||||
"cpuif": self.exp.cpuif,
|
||||
"do_fanin_stage": self.do_fanin_stage,
|
||||
}
|
||||
|
||||
if self.do_fanin_stage:
|
||||
# If adding a fanin pipeline stage, goal is to try to
|
||||
# split the fanin path in the middle so that fanin into the stage
|
||||
# and the following are roughly balanced.
|
||||
fanin_target = math.sqrt(array_size)
|
||||
|
||||
# Size of fanin group to consume per fanin element
|
||||
fanin_stride = math.floor(fanin_target)
|
||||
|
||||
# Number of array elements to reduce to.
|
||||
# Round up to an extra element in case there is some residual
|
||||
fanin_array_size = math.ceil(array_size / fanin_stride)
|
||||
|
||||
# leftovers are handled in an extra array element
|
||||
fanin_residual_stride = array_size % fanin_stride
|
||||
|
||||
if fanin_residual_stride != 0:
|
||||
# If there is a partial fanin element, reduce the number of
|
||||
# loops performed in the bulk fanin stage
|
||||
fanin_loop_iter = fanin_array_size - 1
|
||||
else:
|
||||
fanin_loop_iter = fanin_array_size
|
||||
|
||||
context['fanin_stride'] = fanin_stride
|
||||
context['fanin_array_size'] = fanin_array_size
|
||||
context['fanin_residual_stride'] = fanin_residual_stride
|
||||
context['fanin_loop_iter'] = fanin_loop_iter
|
||||
|
||||
template = self.exp.jj_env.get_template(
|
||||
"readback/templates/readback.sv"
|
||||
)
|
||||
return template.render(context)
|
||||
107
src/peakrdl_regblock/readback/generators.py
Normal file
107
src/peakrdl_regblock/readback/generators.py
Normal file
@@ -0,0 +1,107 @@
|
||||
from typing import TYPE_CHECKING, List
|
||||
|
||||
from ..forloop_generator import RDLForLoopGenerator, LoopBody
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..exporter import RegblockExporter
|
||||
from systemrdl.node import RegNode
|
||||
|
||||
class ReadbackLoopBody(LoopBody):
|
||||
def __init__(self, dim: int, iterator: str, i_type: str) -> None:
|
||||
super().__init__(dim, iterator, i_type)
|
||||
self.n_regs = 0
|
||||
|
||||
def __str__(self) -> str:
|
||||
# replace $i#sz token when stringifying
|
||||
s = super().__str__()
|
||||
token = f"${self.iterator}sz"
|
||||
s = s.replace(token, str(self.n_regs))
|
||||
return s
|
||||
|
||||
class ReadbackAssignmentGenerator(RDLForLoopGenerator):
|
||||
i_type = "genvar"
|
||||
loop_body_cls = ReadbackLoopBody
|
||||
|
||||
def __init__(self, exp:'RegblockExporter') -> None:
|
||||
super().__init__()
|
||||
self.exp = exp
|
||||
|
||||
# The readback array collects all possible readback values into a flat
|
||||
# array. The array width is equal to the CPUIF bus width. Each entry in
|
||||
# the array represents an aligned read access.
|
||||
self.current_offset = 0
|
||||
self.start_offset_stack = [] # type: List[int]
|
||||
self.dim_stack = [] # type: List[int]
|
||||
|
||||
@property
|
||||
def current_offset_str(self) -> str:
|
||||
"""
|
||||
Derive a string that represents the current offset being assigned.
|
||||
This consists of:
|
||||
- The current integer offset
|
||||
- multiplied index of any enclosing loop
|
||||
|
||||
The integer offset from "current_offset" is static and is monotonically
|
||||
incremented as more register assignments are processed.
|
||||
|
||||
The component of the offset from loops is added by multiplying the current
|
||||
loop index by the loop size.
|
||||
Since the loop's size is not known at this time, it is emitted as a
|
||||
placeholder token like: $i0sz, $i1sz, $i2sz, etc
|
||||
These tokens can be replaced once the loop body has been completed and the
|
||||
size of its contents is known.
|
||||
"""
|
||||
offset_parts = []
|
||||
for i in range(self._loop_level):
|
||||
offset_parts.append(f"i{i}*$i{i}sz")
|
||||
offset_parts.append(str(self.current_offset))
|
||||
return " + ".join(offset_parts)
|
||||
|
||||
def enter_Reg(self, node: 'RegNode') -> None:
|
||||
# TODO: account for smaller regs that are not aligned to the bus width
|
||||
# - offset the field bit slice as appropriate
|
||||
# - do not always increment the current offset
|
||||
if node.has_sw_readable:
|
||||
current_bit = 0
|
||||
rd_strb = f"({self.exp.dereferencer.get_access_strobe(node)} && !decoded_req_is_wr)"
|
||||
# Fields are sorted by ascending low bit
|
||||
for field in node.fields():
|
||||
if field.is_sw_readable:
|
||||
# insert reserved assignment before if needed
|
||||
if field.low != current_bit:
|
||||
self.add_content(f"assign readback_array[{self.current_offset_str}][{field.low-1}:{current_bit}] = '0;")
|
||||
|
||||
if field.msb < field.lsb:
|
||||
# Field gets bitswapped since it is in [low:high] orientation
|
||||
value = f"{{<<{{{self.exp.dereferencer.get_value(field)}}}}}"
|
||||
else:
|
||||
value = self.exp.dereferencer.get_value(field)
|
||||
|
||||
self.add_content(f"assign readback_array[{self.current_offset_str}][{field.high}:{field.low}] = {rd_strb} ? {value} : '0;")
|
||||
|
||||
current_bit = field.high + 1
|
||||
|
||||
# Insert final reserved assignment if needed
|
||||
bus_width = self.exp.cpuif.data_width
|
||||
if current_bit < bus_width:
|
||||
self.add_content(f"assign readback_array[{self.current_offset_str}][{bus_width-1}:{current_bit}] = '0;")
|
||||
|
||||
self.current_offset += 1
|
||||
|
||||
def push_loop(self, dim: int) -> None:
|
||||
super().push_loop(dim)
|
||||
self.start_offset_stack.append(self.current_offset)
|
||||
self.dim_stack.append(dim)
|
||||
|
||||
def pop_loop(self) -> None:
|
||||
start_offset = self.start_offset_stack.pop()
|
||||
dim = self.dim_stack.pop()
|
||||
|
||||
# Number of registers enclosed in this loop
|
||||
n_regs = self.current_offset - start_offset
|
||||
self.current_loop.n_regs = n_regs # type: ignore
|
||||
|
||||
super().pop_loop()
|
||||
|
||||
# Advance current scope's offset to account for loop's contents
|
||||
self.current_offset = start_offset + n_regs * dim
|
||||
68
src/peakrdl_regblock/readback/templates/readback.sv
Normal file
68
src/peakrdl_regblock/readback/templates/readback.sv
Normal file
@@ -0,0 +1,68 @@
|
||||
{% if array_assignments is not none %}
|
||||
// Assign readback values to a flattened array
|
||||
logic [{{cpuif.data_width-1}}:0] readback_array[{{array_size}}];
|
||||
{{array_assignments}}
|
||||
|
||||
{% if do_fanin_stage %}
|
||||
// fanin stage
|
||||
logic [{{cpuif.data_width-1}}:0] readback_array_c[{{fanin_array_size}}];
|
||||
for(genvar g=0; g<{{fanin_loop_iter}}; g++) begin
|
||||
always_comb begin
|
||||
automatic logic [{{cpuif.data_width-1}}:0] readback_data_var;
|
||||
readback_data_var = '0;
|
||||
for(int i=g*{{fanin_stride}}; i<((g+1)*{{fanin_stride}}); i++) readback_data_var |= readback_array[i];
|
||||
readback_array_c[g] = readback_data_var;
|
||||
end
|
||||
end
|
||||
{%- if fanin_residual_stride == 1 %}
|
||||
assign readback_array_c[{{fanin_array_size-1}}] = readback_array[{{array_size-1}}];
|
||||
{%- elif fanin_residual_stride > 1 %}
|
||||
always_comb begin
|
||||
automatic logic [{{cpuif.data_width-1}}:0] readback_data_var;
|
||||
readback_data_var = '0;
|
||||
for(int i={{(fanin_array_size-1) * fanin_stride}}; i<{{array_size}}; i++) readback_data_var |= readback_array[i];
|
||||
readback_array_c[{{fanin_array_size-1}}] = readback_data_var;
|
||||
end
|
||||
{%- endif %}
|
||||
|
||||
logic [{{cpuif.data_width-1}}:0] readback_array_r[{{fanin_array_size}}];
|
||||
logic readback_done_r;
|
||||
always_ff @(posedge clk) begin
|
||||
if(rst) begin
|
||||
for(int i=0; i<{{fanin_array_size}}; i++) readback_array_r[i] <= '0;
|
||||
readback_done_r <= '0;
|
||||
end else begin
|
||||
readback_array_r <= readback_array_c;
|
||||
readback_done_r <= decoded_req & ~decoded_req_is_wr;
|
||||
end
|
||||
end
|
||||
|
||||
// Reduce the array
|
||||
always_comb begin
|
||||
automatic logic [{{cpuif.data_width-1}}:0] readback_data_var;
|
||||
readback_done = readback_done_r;
|
||||
readback_err = '0;
|
||||
readback_data_var = '0;
|
||||
for(int i=0; i<{{fanin_array_size}}; i++) readback_data_var |= readback_array_r[i];
|
||||
readback_data = readback_data_var;
|
||||
end
|
||||
|
||||
{%- else %}
|
||||
// Reduce the array
|
||||
always_comb begin
|
||||
automatic logic [{{cpuif.data_width-1}}:0] readback_data_var;
|
||||
readback_done = decoded_req & ~decoded_req_is_wr;
|
||||
readback_err = '0;
|
||||
readback_data_var = '0;
|
||||
for(int i=0; i<{{array_size}}; i++) readback_data_var |= readback_array[i];
|
||||
readback_data = readback_data_var;
|
||||
end
|
||||
{%- endif %}
|
||||
|
||||
|
||||
|
||||
{%- else %}
|
||||
assign readback_done = decoded_req & ~decoded_req_is_wr;
|
||||
assign readback_data = '0;
|
||||
assign readback_err = '0;
|
||||
{% endif %}
|
||||
Reference in New Issue
Block a user