Enhance AXI4-Lite CPU Interface to support high performance pipelined transactions

This commit is contained in:
Alex Mykyta
2022-02-15 22:31:18 -08:00
parent d0ba488904
commit 0fa26f2030
11 changed files with 253 additions and 94 deletions

View File

@@ -51,4 +51,5 @@ optimally reduced.
A second optional read response retiming register can be enabled in-line with the A second optional read response retiming register can be enabled in-line with the
path back to the CPU interface layer. This can be useful if the CPU interface protocol path back to the CPU interface layer. This can be useful if the CPU interface protocol
used has a fully combinational response path, and needs to be retimed further. used has a fully combinational response path, and the design's complexity requires
this path to be retimed further.

11
docs/cpuif/axi4lite.rst Normal file
View File

@@ -0,0 +1,11 @@
AMBA AXI4-Lite
==============
TODO: Describe the following
* List of interface signals
* interface name & modports (link to advanced topics in case user wants to override)
* flattened equivalents
* Download link to SV interface definition

View File

@@ -47,6 +47,7 @@ Links
cpuif/addressing cpuif/addressing
cpuif/apb3 cpuif/apb3
cpuif/axi4lite
cpuif/advanced cpuif/advanced
cpuif/internal_protocol cpuif/internal_protocol

View File

@@ -87,7 +87,7 @@ class DecodeLogicGenerator(RDLForLoopGenerator):
def enter_Reg(self, node: RegNode) -> None: def enter_Reg(self, node: RegNode) -> None:
s = f"{self.addr_decode.get_access_strobe(node)} = cpuif_req & (cpuif_addr == {self._get_address_str(node)});" s = f"{self.addr_decode.get_access_strobe(node)} = cpuif_req_masked & (cpuif_addr == {self._get_address_str(node)});"
self.add_content(s) self.add_content(s)

View File

@@ -14,6 +14,28 @@ class AXI4Lite_Cpuif(CpuifBase):
def data_width_bytes(self) -> int: def data_width_bytes(self) -> int:
return self.data_width // 8 return self.data_width // 8
@property
def regblock_latency(self) -> int:
return max(self.exp.min_read_latency, self.exp.min_write_latency)
@property
def max_outstanding(self) -> int:
"""
Best pipelined performance is when the max outstanding transactions
is the design's latency + 2.
Anything beyond that does not have any effect, aside from adding unnecessary
logic and additional buffer-bloat latency.
"""
return self.regblock_latency + 2
@property
def resp_buffer_size(self) -> int:
"""
Response buffer size must be greater or equal to max outstanding
transactions to prevent response overrun.
"""
return self.max_outstanding
class AXI4Lite_Cpuif_flattened(AXI4Lite_Cpuif): class AXI4Lite_Cpuif_flattened(AXI4Lite_Cpuif):
@property @property

View File

@@ -1,102 +1,216 @@
enum logic [1:0] { // LATENCY = {{cpuif.regblock_latency}}
CPUIF_IDLE, // MAX OUTSTANDING = {{cpuif.max_outstanding}}
CPUIF_BRESP, logic [{{clog2(cpuif.max_outstanding+1)-1}}:0] axil_n_in_flight;
CPUIF_RRESP logic axil_prev_was_rd;
} cpuif_state; logic axil_arvalid;
logic [{{cpuif.addr_width-1}}:0] axil_araddr;
logic cpuif_prev_was_rd; logic axil_ar_accept;
logic axil_awvalid;
logic [{{cpuif.addr_width-1}}:0] axil_awaddr;
logic axil_wvalid;
logic [{{cpuif.data_width-1}}:0] axil_wdata;
logic axil_aw_accept;
logic axil_resp_acked;
always_ff {{get_always_ff_event(cpuif.reset)}} begin always_ff {{get_always_ff_event(cpuif.reset)}} begin
if({{get_resetsignal(cpuif.reset)}}) begin if({{get_resetsignal(cpuif.reset)}}) begin
cpuif_state <= CPUIF_IDLE; axil_prev_was_rd <= '0;
cpuif_prev_was_rd <= '0; axil_arvalid <= '0;
axil_araddr <= '0;
axil_awvalid <= '0;
axil_awaddr <= '0;
axil_wvalid <= '0;
axil_wdata <= '0;
axil_n_in_flight <= '0;
end else begin
// AR* acceptance register
if(axil_ar_accept) begin
axil_prev_was_rd <= '1;
axil_arvalid <= '0;
end
if({{cpuif.signal("arvalid")}} && {{cpuif.signal("arready")}}) begin
axil_arvalid <= '1;
axil_araddr <= {{cpuif.signal("araddr")}};
end
cpuif_req <= '0; // AW* & W* acceptance registers
cpuif_req_is_wr <= '0; if(axil_aw_accept) begin
cpuif_addr <= '0; axil_prev_was_rd <= '0;
cpuif_wr_data <= '0; axil_awvalid <= '0;
axil_wvalid <= '0;
end
if({{cpuif.signal("awvalid")}} && {{cpuif.signal("awready")}}) begin
axil_awvalid <= '1;
axil_awaddr <= {{cpuif.signal("awaddr")}};
end
if({{cpuif.signal("wvalid")}} && {{cpuif.signal("wready")}}) begin
axil_wvalid <= '1;
axil_wdata <= {{cpuif.signal("wdata")}};
end
{{cpuif.signal("arready")}} <= '0; // Keep track of in-flight transactions
{{cpuif.signal("awready")}} <= '0; if((axil_ar_accept || axil_aw_accept) && !axil_resp_acked) begin
{{cpuif.signal("wready")}} <= '0; axil_n_in_flight <= axil_n_in_flight + 1'b1;
end else if(!(axil_ar_accept || axil_aw_accept) && axil_resp_acked) begin
axil_n_in_flight <= axil_n_in_flight - 1'b1;
end
end
end
always_comb begin
{{cpuif.signal("arready")}} = (!axil_arvalid || axil_ar_accept);
{{cpuif.signal("awready")}} = (!axil_awvalid || axil_aw_accept);
{{cpuif.signal("wready")}} = (!axil_wvalid || axil_aw_accept);
end
// Request dispatch
always_comb begin
cpuif_wr_data = axil_wdata;
cpuif_req = '0;
cpuif_req_is_wr = '0;
cpuif_addr = '0;
axil_ar_accept = '0;
axil_aw_accept = '0;
if(axil_n_in_flight < 'd{{cpuif.max_outstanding}}) begin
// Can safely issue more transactions without overwhelming response buffer
if(axil_arvalid && !axil_prev_was_rd) begin
cpuif_req = '1;
cpuif_req_is_wr = '0;
cpuif_addr = axil_araddr;
if(!cpuif_req_stall_rd) axil_ar_accept = '1;
end else if(axil_awvalid && axil_wvalid) begin
cpuif_req = '1;
cpuif_req_is_wr = '1;
cpuif_addr = axil_awaddr;
if(!cpuif_req_stall_wr) axil_aw_accept = '1;
end else if(axil_arvalid) begin
cpuif_req = '1;
cpuif_req_is_wr = '0;
cpuif_addr = axil_araddr;
if(!cpuif_req_stall_rd) axil_ar_accept = '1;
end
end
end
// AXI4-Lite Response Logic
{%- if cpuif.resp_buffer_size == 1 %}
always_ff {{get_always_ff_event(cpuif.reset)}} begin
if({{get_resetsignal(cpuif.reset)}}) begin
{{cpuif.signal("rvalid")}} <= '0;
{{cpuif.signal("rresp")}} <= '0;
{{cpuif.signal("rdata")}} <= '0;
{{cpuif.signal("bvalid")}} <= '0; {{cpuif.signal("bvalid")}} <= '0;
{{cpuif.signal("bresp")}} <= '0; {{cpuif.signal("bresp")}} <= '0;
{{cpuif.signal("rvalid")}} <= '0;
{{cpuif.signal("rdata")}} <= '0;
{{cpuif.signal("rresp")}} <= '0;
end else begin end else begin
// Load response transfers as they arrive if({{cpuif.signal("rvalid")}} && {{cpuif.signal("rready")}}) begin
{{cpuif.signal("rvalid")}} <= '0;
end
if({{cpuif.signal("bvalid")}} && {{cpuif.signal("bready")}}) begin
{{cpuif.signal("bvalid")}} <= '0;
end
if(cpuif_rd_ack) begin if(cpuif_rd_ack) begin
{{cpuif.signal("rvalid")}} <= '1; {{cpuif.signal("rvalid")}} <= '1;
{{cpuif.signal("rdata")}} <= cpuif_rd_data; {{cpuif.signal("rdata")}} <= cpuif_rd_data;
if(cpuif_rd_err) {{cpuif.signal("rresp")}} <= 2'b10; // SLVERR if(cpuif_rd_err) {{cpuif.signal("rresp")}} <= 2'b10; // SLVERR
else {{cpuif.signal("rresp")}} <= 2'b00; // OKAY else {{cpuif.signal("rresp")}} <= 2'b00; // OKAY
end end
if(cpuif_wr_ack) begin if(cpuif_wr_ack) begin
{{cpuif.signal("bvalid")}} <= '1; {{cpuif.signal("bvalid")}} <= '1;
if(cpuif_wr_err) {{cpuif.signal("bresp")}} <= 2'b10; // SLVERR if(cpuif_wr_err) {{cpuif.signal("bresp")}} <= 2'b10; // SLVERR
else {{cpuif.signal("bresp")}} <= 2'b00; // OKAY else {{cpuif.signal("bresp")}} <= 2'b00; // OKAY
end end
// Transaction state machine
case(cpuif_state)
CPUIF_IDLE: begin
// round-robin arbitrate between read/write requests
// Allow read if previous transfer was not a read, or no write is active
if({{cpuif.signal("arvalid")}} && (!cpuif_prev_was_rd || !{{cpuif.signal("awvalid")}} || !{{cpuif.signal("wvalid")}})) begin
cpuif_req <= '1;
cpuif_req_is_wr <= '0;
{%- if cpuif.data_width == 8 %}
cpuif_addr <= {{cpuif.signal("araddr")}}[{{cpuif.addr_width-1}}:0];
{%- else %}
cpuif_addr <= { {{-cpuif.signal("araddr")}}[{{cpuif.addr_width-1}}:{{clog2(cpuif.data_width_bytes)}}], {{clog2(cpuif.data_width_bytes)}}'b0};
{%- endif %}
{{cpuif.signal("arready")}} <= '1;
cpuif_state <= CPUIF_RRESP;
end else if({{cpuif.signal("awvalid")}} && {{cpuif.signal("wvalid")}}) begin
{{cpuif.signal("awready")}} <= '1;
{{cpuif.signal("wready")}} <= '1;
if({{cpuif.signal("wstrb")}} != {{"%d'b" % cpuif.data_width_bytes}}{{"1" * cpuif.data_width_bytes}}) begin
// Unaligned writes or use of byte strobes is not supported yet
{{cpuif.signal("bvalid")}} <= '1;
{{cpuif.signal("bresp")}} <= 2'b10; // SLVERR
end else begin
cpuif_req <= '1;
cpuif_req_is_wr <= '1;
{%- if cpuif.data_width == 8 %}
cpuif_addr <= {{cpuif.signal("awaddr")}}[{{cpuif.addr_width-1}}:0];
{%- else %}
cpuif_addr <= { {{-cpuif.signal("awaddr")}}[{{cpuif.addr_width-1}}:{{clog2(cpuif.data_width_bytes)}}], {{clog2(cpuif.data_width_bytes)}}'b0};
{%- endif %}
cpuif_wr_data <= {{cpuif.signal("wdata")}};
end
cpuif_state <= CPUIF_BRESP;
end
end
CPUIF_BRESP: begin
cpuif_req <= '0;
{{cpuif.signal("awready")}} <= '0;
{{cpuif.signal("wready")}} <= '0;
cpuif_prev_was_rd <= '0;
if({{cpuif.signal("bvalid")}} && {{cpuif.signal("bready")}}) begin
{{cpuif.signal("bvalid")}} <= '0;
cpuif_state <= CPUIF_IDLE;
end
end
CPUIF_RRESP: begin
cpuif_req <= '0;
{{cpuif.signal("arready")}} <= '0;
cpuif_prev_was_rd <= '1;
if({{cpuif.signal("rvalid")}} && {{cpuif.signal("rready")}}) begin
{{cpuif.signal("rvalid")}} <= '0;
cpuif_state <= CPUIF_IDLE;
end
end
default: begin
cpuif_state <= CPUIF_IDLE;
end
endcase
end end
end end
always_comb begin
axil_resp_acked = '0;
if({{cpuif.signal("rvalid")}} && {{cpuif.signal("rready")}}) axil_resp_acked = '1;
if({{cpuif.signal("bvalid")}} && {{cpuif.signal("bready")}}) axil_resp_acked = '1;
end
{%- else %}
struct {
logic is_wr;
logic err;
logic [{{cpuif.data_width-1}}:0] rdata;
} axil_resp_buffer[{{cpuif.resp_buffer_size}}];
logic [{{clog2(cpuif.resp_buffer_size)}}:0] axil_resp_wptr;
logic [{{clog2(cpuif.resp_buffer_size)}}:0] axil_resp_rptr;
always_ff {{get_always_ff_event(cpuif.reset)}} begin
if({{get_resetsignal(cpuif.reset)}}) begin
for(int i=0; i<{{cpuif.resp_buffer_size}}; i++) begin
axil_resp_buffer[i].is_wr = '0;
axil_resp_buffer[i].err = '0;
axil_resp_buffer[i].rdata = '0;
end
axil_resp_wptr <= '0;
axil_resp_rptr <= '0;
end else begin
// Store responses in buffer until AXI response channel accepts them
if(cpuif_rd_ack || cpuif_wr_ack) begin
if(cpuif_rd_ack) begin
axil_resp_buffer[axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0]].is_wr = '0;
axil_resp_buffer[axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0]].err = cpuif_rd_err;
axil_resp_buffer[axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0]].rdata = cpuif_rd_data;
end else if(cpuif_wr_ack) begin
axil_resp_buffer[axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0]].is_wr = '1;
axil_resp_buffer[axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0]].err = cpuif_wr_err;
end
{%- if is_pow2(cpuif.resp_buffer_size) %}
axil_resp_wptr <= axil_resp_wptr + 1'b1;
{%- else %}
if(axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0] == {{cpuif.resp_buffer_size-1}}) begin
axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0] <= '0;
axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)}}] <= ~axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)}}];
end else begin
axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0] <= axil_resp_wptr[{{clog2(cpuif.resp_buffer_size)-1}}:0] + 1'b1;
end
{%- endif %}
end
// Advance read pointer when acknowledged
if(axil_resp_acked) begin
{%- if is_pow2(cpuif.resp_buffer_size) %}
axil_resp_rptr <= axil_resp_rptr + 1'b1;
{%- else %}
if(axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)-1}}:0] == {{cpuif.resp_buffer_size-1}}) begin
axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)-1}}:0] <= '0;
axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)}}] <= ~axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)}}];
end else begin
axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)-1}}:0] <= axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)-1}}:0] + 1'b1;
end
{%- endif %}
end
end
end
always_comb begin
axil_resp_acked = '0;
{{cpuif.signal("bvalid")}} = '0;
{{cpuif.signal("rvalid")}} = '0;
if(axil_resp_rptr != axil_resp_wptr) begin
if(axil_resp_buffer[axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)-1}}:0]].is_wr) begin
{{cpuif.signal("bvalid")}} = '1;
if({{cpuif.signal("bready")}}) axil_resp_acked = '1;
end else begin
{{cpuif.signal("rvalid")}} = '1;
if({{cpuif.signal("rready")}}) axil_resp_acked = '1;
end
end
{{cpuif.signal("rdata")}} = axil_resp_buffer[axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)-1}}:0]].rdata;
if(axil_resp_buffer[axil_resp_rptr[{{clog2(cpuif.resp_buffer_size)-1}}:0]].err) begin
{{cpuif.signal("bresp")}} = 2'b10;
{{cpuif.signal("rresp")}} = 2'b10;
end else begin
{{cpuif.signal("bresp")}} = 2'b00;
{{cpuif.signal("rresp")}} = 2'b00;
end
end
{%- endif %}

View File

@@ -1,6 +1,6 @@
from typing import TYPE_CHECKING, Optional from typing import TYPE_CHECKING, Optional
from ..utils import get_always_ff_event, clog2 from ..utils import get_always_ff_event, clog2, is_pow2
if TYPE_CHECKING: if TYPE_CHECKING:
from ..exporter import RegblockExporter from ..exporter import RegblockExporter
@@ -25,6 +25,7 @@ class CpuifBase:
"get_always_ff_event": lambda resetsignal : get_always_ff_event(self.exp.dereferencer, resetsignal), "get_always_ff_event": lambda resetsignal : get_always_ff_event(self.exp.dereferencer, resetsignal),
"get_resetsignal": self.exp.dereferencer.get_resetsignal, "get_resetsignal": self.exp.dereferencer.get_resetsignal,
"clog2": clog2, "clog2": clog2,
"is_pow2": is_pow2,
} }
template = self.exp.jj_env.get_template(self.template_path) template = self.exp.jj_env.get_template(self.template_path)

View File

@@ -31,6 +31,8 @@ class RegblockExporter:
self.field_logic = FieldLogic(self) self.field_logic = FieldLogic(self)
self.readback = None # type: Readback self.readback = None # type: Readback
self.dereferencer = Dereferencer(self) self.dereferencer = Dereferencer(self)
self.min_read_latency = 0
self.min_write_latency = 0
if user_template_dir: if user_template_dir:
loader = jj.ChoiceLoader([ loader = jj.ChoiceLoader([
@@ -76,12 +78,12 @@ class RegblockExporter:
if kwargs: if kwargs:
raise TypeError("got an unexpected keyword argument '%s'" % list(kwargs.keys())[0]) raise TypeError("got an unexpected keyword argument '%s'" % list(kwargs.keys())[0])
min_read_latency = 0 self.min_read_latency = 0
min_write_latency = 0 self.min_write_latency = 0
if retime_read_fanin: if retime_read_fanin:
min_read_latency += 1 self.min_read_latency += 1
if retime_read_response: if retime_read_response:
min_read_latency += 1 self.min_read_latency += 1
# Scan the design for any unsupported features # Scan the design for any unsupported features
# Also collect pre-export information # Also collect pre-export information
@@ -120,8 +122,8 @@ class RegblockExporter:
"readback": self.readback, "readback": self.readback,
"get_always_ff_event": lambda resetsignal : get_always_ff_event(self.dereferencer, resetsignal), "get_always_ff_event": lambda resetsignal : get_always_ff_event(self.dereferencer, resetsignal),
"retime_read_response": retime_read_response, "retime_read_response": retime_read_response,
"min_read_latency": min_read_latency, "min_read_latency": self.min_read_latency,
"min_write_latency": min_write_latency, "min_write_latency": self.min_write_latency,
} }
# Write out design # Write out design

View File

@@ -36,10 +36,12 @@ module {{module_name}} (
{{cpuif.get_implementation()|indent}} {{cpuif.get_implementation()|indent}}
logic cpuif_req_masked;
{% if min_read_latency == min_write_latency %} {% if min_read_latency == min_write_latency %}
// Read & write latencies are balanced. Stalls not required // Read & write latencies are balanced. Stalls not required
assign cpuif_req_stall_rd = '0; assign cpuif_req_stall_rd = '0;
assign cpuif_req_stall_wr = '0; assign cpuif_req_stall_wr = '0;
assign cpuif_req_masked = cpuif_req;
{%- elif min_read_latency > min_write_latency %} {%- elif min_read_latency > min_write_latency %}
// Read latency > write latency. May need to delay next write that follows a read // Read latency > write latency. May need to delay next write that follows a read
logic [{{min_read_latency - min_write_latency - 1}}:0] cpuif_req_stall_sr; logic [{{min_read_latency - min_write_latency - 1}}:0] cpuif_req_stall_sr;
@@ -54,6 +56,7 @@ module {{module_name}} (
end end
assign cpuif_req_stall_rd = '0; assign cpuif_req_stall_rd = '0;
assign cpuif_req_stall_wr = cpuif_req_stall_sr[0]; assign cpuif_req_stall_wr = cpuif_req_stall_sr[0];
assign cpuif_req_masked = cpuif_req & !(cpuif_req_is_wr & cpuif_req_stall_wr);
{%- else %} {%- else %}
// Write latency > read latency. May need to delay next read that follows a write // Write latency > read latency. May need to delay next read that follows a write
logic [{{min_write_latency - min_read_latency - 1}}:0] cpuif_req_stall_sr; logic [{{min_write_latency - min_read_latency - 1}}:0] cpuif_req_stall_sr;
@@ -68,6 +71,7 @@ module {{module_name}} (
end end
assign cpuif_req_stall_rd = cpuif_req_stall_sr[0]; assign cpuif_req_stall_rd = cpuif_req_stall_sr[0];
assign cpuif_req_stall_wr = '0; assign cpuif_req_stall_wr = '0;
assign cpuif_req_masked = cpuif_req & !(!cpuif_req_is_wr & cpuif_req_stall_rd);
{%- endif %} {%- endif %}
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------
@@ -84,7 +88,7 @@ module {{module_name}} (
end end
// Pass down signals to next stage // Pass down signals to next stage
assign decoded_req = cpuif_req; assign decoded_req = cpuif_req_masked;
assign decoded_req_is_wr = cpuif_req_is_wr; assign decoded_req_is_wr = cpuif_req_is_wr;
assign decoded_wr_data = cpuif_wr_data; assign decoded_wr_data = cpuif_wr_data;

View File

@@ -32,4 +32,7 @@ def get_always_ff_event(dereferencer: 'Dereferencer', resetsignal: 'Optional[Sig
return "@(posedge clk)" return "@(posedge clk)"
def clog2(n: int) -> int: def clog2(n: int) -> int:
return n.bit_length() - 1 return (n-1).bit_length()
def is_pow2(x: int) -> bool:
return (x > 0) and ((x & (x - 1)) == 0)

View File

@@ -136,7 +136,7 @@ interface axi4lite_intf_driver #(
begin begin
txn_b_mutex.get(); txn_b_mutex.get();
@cb; @cb;
while(cb.BREADY !== 1'b1 && cb.BVALID !== 1'b1) @(cb); while(!(cb.BREADY === 1'b1 && cb.BVALID === 1'b1)) @(cb);
assert(!$isunknown(cb.BRESP)) else $error("Read from 0x%0x returned X's on BRESP", addr); assert(!$isunknown(cb.BRESP)) else $error("Read from 0x%0x returned X's on BRESP", addr);
txn_b_mutex.put(); txn_b_mutex.put();
end end