Enhance AXI4-Lite CPU Interface to support high performance pipelined transactions

This commit is contained in:
Alex Mykyta
2022-02-15 22:31:18 -08:00
parent d0ba488904
commit 0fa26f2030
11 changed files with 253 additions and 94 deletions

View File

@@ -87,7 +87,7 @@ class DecodeLogicGenerator(RDLForLoopGenerator):
def enter_Reg(self, node: RegNode) -> None:
s = f"{self.addr_decode.get_access_strobe(node)} = cpuif_req & (cpuif_addr == {self._get_address_str(node)});"
s = f"{self.addr_decode.get_access_strobe(node)} = cpuif_req_masked & (cpuif_addr == {self._get_address_str(node)});"
self.add_content(s)

View File

@@ -14,6 +14,28 @@ class AXI4Lite_Cpuif(CpuifBase):
def data_width_bytes(self) -> int:
return self.data_width // 8
@property
def regblock_latency(self) -> int:
return max(self.exp.min_read_latency, self.exp.min_write_latency)
@property
def max_outstanding(self) -> int:
"""
Best pipelined performance is when the max outstanding transactions
is the design's latency + 2.
Anything beyond that does not have any effect, aside from adding unnecessary
logic and additional buffer-bloat latency.
"""
return self.regblock_latency + 2
@property
def resp_buffer_size(self) -> int:
"""
Response buffer size must be greater or equal to max outstanding
transactions to prevent response overrun.
"""
return self.max_outstanding
class AXI4Lite_Cpuif_flattened(AXI4Lite_Cpuif):
@property

View File

@@ -1,102 +1,216 @@
enum logic [1:0] {
CPUIF_IDLE,
CPUIF_BRESP,
CPUIF_RRESP
} cpuif_state;
logic cpuif_prev_was_rd;
// LATENCY = {{cpuif.regblock_latency}}
// MAX OUTSTANDING = {{cpuif.max_outstanding}}
logic [{{clog2(cpuif.max_outstanding+1)-1}}:0] axil_n_in_flight;
logic axil_prev_was_rd;
logic axil_arvalid;
logic [{{cpuif.addr_width-1}}:0] axil_araddr;
logic axil_ar_accept;
logic axil_awvalid;
logic [{{cpuif.addr_width-1}}:0] axil_awaddr;
logic axil_wvalid;
logic [{{cpuif.data_width-1}}:0] axil_wdata;
logic axil_aw_accept;
logic axil_resp_acked;
always_ff {{get_always_ff_event(cpuif.reset)}} begin
if({{get_resetsignal(cpuif.reset)}}) begin
cpuif_state <= CPUIF_IDLE;
cpuif_prev_was_rd <= '0;
axil_prev_was_rd <= '0;
axil_arvalid <= '0;
axil_araddr <= '0;
axil_awvalid <= '0;
axil_awaddr <= '0;
axil_wvalid <= '0;
axil_wdata <= '0;
axil_n_in_flight <= '0;
end else begin
// AR* acceptance register
if(axil_ar_accept) begin
axil_prev_was_rd <= '1;
axil_arvalid <= '0;
end
if({{cpuif.signal("arvalid")}} && {{cpuif.signal("arready")}}) begin
axil_arvalid <= '1;
axil_araddr <= {{cpuif.signal("araddr")}};
end
cpuif_req <= '0;
cpuif_req_is_wr <= '0;
cpuif_addr <= '0;
cpuif_wr_data <= '0;
// AW* & W* acceptance registers
if(axil_aw_accept) begin
axil_prev_was_rd <= '0;
axil_awvalid <= '0;
axil_wvalid <= '0;
end
if({{cpuif.signal("awvalid")}} && {{cpuif.signal("awready")}}) begin
axil_awvalid <= '1;
axil_awaddr <= {{cpuif.signal("awaddr")}};
end
if({{cpuif.signal("wvalid")}} && {{cpuif.signal("wready")}}) begin
axil_wvalid <= '1;
axil_wdata <= {{cpuif.signal("wdata")}};
end
{{cpuif.signal("arready")}} <= '0;
{{cpuif.signal("awready")}} <= '0;
{{cpuif.signal("wready")}} <= '0;
// Keep track of in-flight transactions
if((axil_ar_accept || axil_aw_accept) && !axil_resp_acked) begin
axil_n_in_flight <= axil_n_in_flight + 1'b1;
end else if(!(axil_ar_accept || axil_aw_accept) && axil_resp_acked) begin
axil_n_in_flight <= axil_n_in_flight - 1'b1;
end
end
end
always_comb begin
{{cpuif.signal("arready")}} = (!axil_arvalid || axil_ar_accept);
{{cpuif.signal("awready")}} = (!axil_awvalid || axil_aw_accept);
{{cpuif.signal("wready")}} = (!axil_wvalid || axil_aw_accept);
end
// Request dispatch
always_comb begin
cpuif_wr_data = axil_wdata;
cpuif_req = '0;
cpuif_req_is_wr = '0;
cpuif_addr = '0;
axil_ar_accept = '0;
axil_aw_accept = '0;
if(axil_n_in_flight < 'd{{cpuif.max_outstanding}}) begin
// Can safely issue more transactions without overwhelming response buffer
if(axil_arvalid && !axil_prev_was_rd) begin
cpuif_req = '1;
cpuif_req_is_wr = '0;
cpuif_addr = axil_araddr;
if(!cpuif_req_stall_rd) axil_ar_accept = '1;
end else if(axil_awvalid && axil_wvalid) begin
cpuif_req = '1;
cpuif_req_is_wr = '1;
cpuif_addr = axil_awaddr;
if(!cpuif_req_stall_wr) axil_aw_accept = '1;
end else if(axil_arvalid) begin
cpuif_req = '1;
cpuif_req_is_wr = '0;
cpuif_addr = axil_araddr;
if(!cpuif_req_stall_rd) axil_ar_accept = '1;
end
end
end
// AXI4-Lite Response Logic
{%- if cpuif.resp_buffer_size == 1 %}
always_ff {{get_always_ff_event(cpuif.reset)}} begin
if({{get_resetsignal(cpuif.reset)}}) begin
{{cpuif.signal("rvalid")}} <= '0;
{{cpuif.signal("rresp")}} <= '0;
{{cpuif.signal("rdata")}} <= '0;
{{cpuif.signal("bvalid")}} <= '0;
{{cpuif.signal("bresp")}} <= '0;
{{cpuif.signal("rvalid")}} <= '0;
{{cpuif.signal("rdata")}} <= '0;
{{cpuif.signal("rresp")}} <= '0;
end else begin
// Load response transfers as they arrive
if({{cpuif.signal("rvalid")}} && {{cpuif.signal("rready")}}) begin
{{cpuif.signal("rvalid")}} <= '0;
end
if({{cpuif.signal("bvalid")}} && {{cpuif.signal("bready")}}) begin
{{cpuif.signal("bvalid")}} <= '0;
end
if(cpuif_rd_ack) begin
{{cpuif.signal("rvalid")}} <= '1;
{{cpuif.signal("rdata")}} <= cpuif_rd_data;
if(cpuif_rd_err) {{cpuif.signal("rresp")}} <= 2'b10; // SLVERR
else {{cpuif.signal("rresp")}} <= 2'b00; // OKAY
end
if(cpuif_wr_ack) begin
{{cpuif.signal("bvalid")}} <= '1;
if(cpuif_wr_err) {{cpuif.signal("bresp")}} <= 2'b10; // SLVERR
else {{cpuif.signal("bresp")}} <= 2'b00; // OKAY
end
// Transaction state machine
case(cpuif_state)
CPUIF_IDLE: begin
// round-robin arbitrate between read/write requests
// Allow read if previous transfer was not a read, or no write is active
if({{cpuif.signal("arvalid")}} && (!cpuif_prev_was_rd || !{{cpuif.signal("awvalid")}} || !{{cpuif.signal("wvalid")}})) begin
cpuif_req <= '1;
cpuif_req_is_wr <= '0;
{%- if cpuif.data_width == 8 %}
cpuif_addr <= {{cpuif.signal("araddr")}}[{{cpuif.addr_width-1}}:0];
{%- else %}
cpuif_addr <= { {{-cpuif.signal("araddr")}}[{{cpuif.addr_width-1}}:{{clog2(cpuif.data_width_bytes)}}], {{clog2(cpuif.data_width_bytes)}}'b0};
{%- endif %}
{{cpuif.signal("arready")}} <= '1;
cpuif_state <= CPUIF_RRESP;
end else if({{cpuif.signal("awvalid")}} && {{cpuif.signal("wvalid")}}) begin
{{cpuif.signal("awready")}} <= '1;
{{cpuif.signal("wready")}} <= '1;
if({{cpuif.signal("wstrb")}} != {{"%d'b" % cpuif.data_width_bytes}}{{"1" * cpuif.data_width_bytes}}) begin
// Unaligned writes or use of byte strobes is not supported yet
{{cpuif.signal("bvalid")}} <= '1;
{{cpuif.signal("bresp")}} <= 2'b10; // SLVERR
end else begin
cpuif_req <= '1;
cpuif_req_is_wr <= '1;
{%- if cpuif.data_width == 8 %}
cpuif_addr <= {{cpuif.signal("awaddr")}}[{{cpuif.addr_width-1}}:0];
{%- else %}
cpuif_addr <= { {{-cpuif.signal("awaddr")}}[{{cpuif.addr_width-1}}:{{clog2(cpuif.data_width_bytes)}}], {{clog2(cpuif.data_width_bytes)}}'b0};
{%- endif %}
cpuif_wr_data <= {{cpuif.signal("wdata")}};
end
cpuif_state <= CPUIF_BRESP;
end
end
CPUIF_BRESP: begin
cpuif_req <= '0;
{{cpuif.signal("awready")}} <= '0;
{{cpuif.signal("wready")}} <= '0;
cpuif_prev_was_rd <= '0;
if({{cpuif.signal("bvalid")}} && {{cpuif.signal("bready")}}) begin
{{cpuif.signal("bvalid")}} <= '0;
cpuif_state <= CPUIF_IDLE;
end
end
CPUIF_RRESP: begin
cpuif_req <= '0;
{{cpuif.signal("arready")}} <= '0;
cpuif_prev_was_rd <= '1;
if({{cpuif.signal("rvalid")}} && {{cpuif.signal("rready")}}) begin
{{cpuif.signal("rvalid")}} <= '0;
cpuif_state <= CPUIF_IDLE;
end
end
default: begin
cpuif_state <= CPUIF_IDLE;
end
endcase
end
end
always_comb begin
axil_resp_acked = '0;
if({{cpuif.signal("rvalid")}} && {{cpuif.signal("rready")}}) axil_resp_acked = '1;
if({{cpuif.signal("bvalid")}} && {{cpuif.signal("bready")}}) axil_resp_acked = '1;
end
{%- else %}
struct {
logic is_wr;
logic err;
logic [{{cpuif.data_width-1}}:0] rdata;
} axil_resp_buffer[{{cpuif.resp_buffer_size}}];
logic [{{clog2(cpuif.resp_buffer_size)}}:0] axil_resp_wptr;
logic [{{clog2(cpuif.resp_buffer_size)}}:0] axil_resp_rptr;
always_ff {{get_always_ff_event(cpuif.reset)}} begin
if({{get_resetsignal(cpuif.reset)}}) begin
for(int i=0; i<{{cpuif.resp_buffer_size}}; i++) begin
axil_resp_buffer[i].is_wr = '0;
axil_resp_buffer[i].err = '0;
axil_resp_buffer[i].rdata = '0;
end
axil_resp_wptr <= '0;
axil_resp_rptr <= '0;
end else begin
// Store responses in buffer until AXI response channel accepts them
if(cpuif_rd_ack || cpuif_wr_ack) begin
if(cpuif_rd_ack) begin
axil_resp_buffer[axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0]].is_wr = '0;
axil_resp_buffer[axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0]].err = cpuif_rd_err;
axil_resp_buffer[axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0]].rdata = cpuif_rd_data;
end else if(cpuif_wr_ack) begin
axil_resp_buffer[axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0]].is_wr = '1;
axil_resp_buffer[axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0]].err = cpuif_wr_err;
end
{%- if is_pow2(cpuif.resp_buffer_size) %}
axil_resp_wptr <= axil_resp_wptr + 1'b1;
{%- else %}
if(axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0] == {{cpuif.resp_buffer_size-1}}) begin
axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0] <= '0;
axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)}}] <= ~axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)}}];
end else begin
axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0] <= axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0] + 1'b1;
end
{%- endif %}
end
// Advance read pointer when acknowledged
if(axil_resp_acked) begin
{%- if is_pow2(cpuif.resp_buffer_size) %}
axil_resp_rptr <= axil_resp_rptr + 1'b1;
{%- else %}
if(axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)-1}}:0] == {{cpuif.resp_buffer_size-1}}) begin
axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)-1}}:0] <= '0;
axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)}}] <= ~axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)}}];
end else begin
axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)-1}}:0] <= axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)-1}}:0] + 1'b1;
end
{%- endif %}
end
end
end
always_comb begin
axil_resp_acked = '0;
{{cpuif.signal("bvalid")}} = '0;
{{cpuif.signal("rvalid")}} = '0;
if(axil_resp_rptr != axil_resp_wptr) begin
if(axil_resp_buffer[axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)-1}}:0]].is_wr) begin
{{cpuif.signal("bvalid")}} = '1;
if({{cpuif.signal("bready")}}) axil_resp_acked = '1;
end else begin
{{cpuif.signal("rvalid")}} = '1;
if({{cpuif.signal("rready")}}) axil_resp_acked = '1;
end
end
{{cpuif.signal("rdata")}} = axil_resp_buffer[axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)-1}}:0]].rdata;
if(axil_resp_buffer[axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)-1}}:0]].err) begin
{{cpuif.signal("bresp")}} = 2'b10;
{{cpuif.signal("rresp")}} = 2'b10;
end else begin
{{cpuif.signal("bresp")}} = 2'b00;
{{cpuif.signal("rresp")}} = 2'b00;
end
end
{%- endif %}

View File

@@ -1,6 +1,6 @@
from typing import TYPE_CHECKING, Optional
from ..utils import get_always_ff_event, clog2
from ..utils import get_always_ff_event, clog2, is_pow2
if TYPE_CHECKING:
from ..exporter import RegblockExporter
@@ -25,6 +25,7 @@ class CpuifBase:
"get_always_ff_event": lambda resetsignal : get_always_ff_event(self.exp.dereferencer, resetsignal),
"get_resetsignal": self.exp.dereferencer.get_resetsignal,
"clog2": clog2,
"is_pow2": is_pow2,
}
template = self.exp.jj_env.get_template(self.template_path)

View File

@@ -31,6 +31,8 @@ class RegblockExporter:
self.field_logic = FieldLogic(self)
self.readback = None # type: Readback
self.dereferencer = Dereferencer(self)
self.min_read_latency = 0
self.min_write_latency = 0
if user_template_dir:
loader = jj.ChoiceLoader([
@@ -76,12 +78,12 @@ class RegblockExporter:
if kwargs:
raise TypeError("got an unexpected keyword argument '%s'" % list(kwargs.keys())[0])
min_read_latency = 0
min_write_latency = 0
self.min_read_latency = 0
self.min_write_latency = 0
if retime_read_fanin:
min_read_latency += 1
self.min_read_latency += 1
if retime_read_response:
min_read_latency += 1
self.min_read_latency += 1
# Scan the design for any unsupported features
# Also collect pre-export information
@@ -120,8 +122,8 @@ class RegblockExporter:
"readback": self.readback,
"get_always_ff_event": lambda resetsignal : get_always_ff_event(self.dereferencer, resetsignal),
"retime_read_response": retime_read_response,
"min_read_latency": min_read_latency,
"min_write_latency": min_write_latency,
"min_read_latency": self.min_read_latency,
"min_write_latency": self.min_write_latency,
}
# Write out design

View File

@@ -36,10 +36,12 @@ module {{module_name}} (
{{cpuif.get_implementation()|indent}}
logic cpuif_req_masked;
{% if min_read_latency == min_write_latency %}
// Read & write latencies are balanced. Stalls not required
assign cpuif_req_stall_rd = '0;
assign cpuif_req_stall_wr = '0;
assign cpuif_req_masked = cpuif_req;
{%- elif min_read_latency > min_write_latency %}
// Read latency > write latency. May need to delay next write that follows a read
logic [{{min_read_latency - min_write_latency - 1}}:0] cpuif_req_stall_sr;
@@ -54,6 +56,7 @@ module {{module_name}} (
end
assign cpuif_req_stall_rd = '0;
assign cpuif_req_stall_wr = cpuif_req_stall_sr[0];
assign cpuif_req_masked = cpuif_req & !(cpuif_req_is_wr & cpuif_req_stall_wr);
{%- else %}
// Write latency > read latency. May need to delay next read that follows a write
logic [{{min_write_latency - min_read_latency - 1}}:0] cpuif_req_stall_sr;
@@ -68,6 +71,7 @@ module {{module_name}} (
end
assign cpuif_req_stall_rd = cpuif_req_stall_sr[0];
assign cpuif_req_stall_wr = '0;
assign cpuif_req_masked = cpuif_req & !(!cpuif_req_is_wr & cpuif_req_stall_rd);
{%- endif %}
//--------------------------------------------------------------------------
@@ -84,7 +88,7 @@ module {{module_name}} (
end
// Pass down signals to next stage
assign decoded_req = cpuif_req;
assign decoded_req = cpuif_req_masked;
assign decoded_req_is_wr = cpuif_req_is_wr;
assign decoded_wr_data = cpuif_wr_data;

View File

@@ -32,4 +32,7 @@ def get_always_ff_event(dereferencer: 'Dereferencer', resetsignal: 'Optional[Sig
return "@(posedge clk)"
def clog2(n: int) -> int:
return n.bit_length() - 1
return (n-1).bit_length()
def is_pow2(x: int) -> bool:
return (x > 0) and ((x & (x - 1)) == 0)