Get it roughly working

This commit is contained in:
2026-05-24 15:53:50 -07:00
parent aa8c4a64df
commit 61ee654b18
5 changed files with 644 additions and 59 deletions

View File

@@ -0,0 +1,299 @@
import cocotb
from cocotb.handle import LogicArray, Array, Immediate
from cocotb.clock import Clock
from cocotb.triggers import ReadOnly, NextTimeStep, RisingEdge, Timer
import logging
import random
from enum import IntEnum
logger = logging.getLogger()
logger.setLevel(logging.INFO)
CLK_PERIOD = 5
SETS = 64
WAYS = 4
TAG_WIDTH = 20
data_arrays = [{}, {}, {}, {}]
meta_arrays = [{}, {}, {}, {}]
lru_array = {}
class MesiState(IntEnum):
MESI_INVALID = 0
MESI_SHARED = 1,
MESI_EXCLUSIVE = 2,
MESI_MODIFIED = 3,
def write_cacheline(index: int, way: int, data: bytes, mesi_state: MesiState, tag: int):
data_arrays[way][index] = data
meta_arrays[way][index] = (mesi_state << 20) | tag
async def handle_cache_arrays(dut):
while True:
await RisingEdge(dut.i_clk)
if dut.o_write_valid.value:
index = int(dut.o_write_index.value)
write_enables = [bool(int(dut.o_write_valid.value) & (1 << i)) for i in range(4)]
write_data = dut.o_write_data.value.to_bytes(byteorder="little")
write_meta = int(dut.o_write_meta.value)
logger.debug(f"Write Valid: {index=} {write_enables=} {write_data=} {write_meta=:#x}")
for data_array, meta_array, write_enable in zip(data_arrays, meta_arrays, write_enables):
if write_enable:
data_array[index] = write_data
meta_array[index] = write_meta
if dut.o_read_valid.value:
index = int(dut.o_read_index.value)
logger.debug(f"Read Valid: {index=}")
read_data = [LogicArray.from_bytes(data[index], byteorder="little") for data in data_arrays]
read_meta = [meta[index] for meta in meta_arrays]
dut.i_read_data.value = read_data
dut.i_read_meta.value = read_meta
async def handle_lru_arrays(dut):
while True:
await RisingEdge(dut.i_clk)
if dut.o_lru_write_valid.value:
logger.debug("lru write")
lru_write_index = int(dut.o_lru_write_index.value)
lru_write_data = int(dut.o_lru_write_data.value)
lru_array[lru_write_index] = lru_write_data
if dut.o_lru_read_valid.value:
logger.debug("lru read")
lru_read_index = int(dut.o_lru_read_index.value)
dut.i_lru_read_data.value = lru_array[lru_read_index]
async def handle_writeback(dut):
dut.i_writeback_done.value = 0
while True:
await RisingEdge(dut.i_clk)
if not dut.o_writeback_valid.value:
continue
logger.info("Writeback valid")
await RisingEdge(dut.i_clk)
await RisingEdge(dut.i_clk)
dut.i_writeback_done.value = 1
await RisingEdge(dut.i_clk)
dut.i_writeback_done.value = 0
async def handle_bus_interface(dut):
dut.i_memory_done.value = 0
dut.i_memory_resp.value = 0
while True:
await RisingEdge(dut.i_clk)
if not dut.o_memory_valid.value:
continue
logger.debug("Bus Interface Access")
await RisingEdge(dut.i_clk)
await RisingEdge(dut.i_clk)
dut.i_memory_done.value = 1
dut.i_memory_resp.value = 2
await RisingEdge(dut.i_clk)
dut.i_memory_done.value = 0
dut.i_memory_resp.value = 0
@cocotb.test
async def test_sanity(dut):
# Request a read from the cache, then request a write to the cache
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(handle_cache_arrays(dut))
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await RisingEdge(dut.o_rdy)
for way in range(WAYS):
for index in range(SETS):
write_cacheline(index, way, bytes([0] * 64), MesiState.MESI_EXCLUSIVE, 0)
for i in range(32):
if not dut.o_rdy.value:
continue
dut.i_cpu_tag.value = 0
dut.i_cpu_index.value = i
dut.i_cpu_offset.value = 0
dut.i_rdy.value = 1
dut.i_cpu_we.value = 0
await RisingEdge(dut.i_clk)
@cocotb.test
async def test_clean_eviction(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(handle_cache_arrays(dut))
cocotb.start_soon(handle_lru_arrays(dut))
cocotb.start_soon(handle_writeback(dut))
cocotb.start_soon(handle_bus_interface(dut))
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await RisingEdge(dut.o_rdy)
INDEX = 2
# Write with tag 0x55
for way in range(WAYS):
write_cacheline(INDEX, way, bytes([0xaa] * 64), MesiState.MESI_SHARED, way+1)
# read with tag 0xaa
dut.i_cpu_tag.value = 0x0
dut.i_cpu_index.value = INDEX
dut.i_cpu_offset.value = 2
dut.i_rdy.value = 1
dut.i_cpu_we.value = 0
await RisingEdge(dut.i_clk)
dut.i_cpu_tag.value = 0xaa
await RisingEdge(dut.i_clk)
dut.i_cpu_tag.value = 0
await Timer(1, "us")
@cocotb.test
async def test_eviction(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(handle_cache_arrays(dut))
cocotb.start_soon(handle_lru_arrays(dut))
cocotb.start_soon(handle_writeback(dut))
cocotb.start_soon(handle_bus_interface(dut))
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await RisingEdge(dut.o_rdy)
INDEX = 2
# Write with tag 0x55
for way in range(WAYS):
write_cacheline(INDEX, way, bytes([0xaa] * 64), MesiState.MESI_MODIFIED, way+1)
# read with tag 0xaa
dut.i_cpu_tag.value = 0x0
dut.i_cpu_index.value = INDEX
dut.i_cpu_offset.value = 2
dut.i_rdy.value = 1
dut.i_cpu_we.value = 0
await RisingEdge(dut.i_clk)
dut.i_cpu_tag.value = 0xaa
await RisingEdge(dut.i_clk)
dut.i_cpu_tag.value = 0
await Timer(1, "us")
@cocotb.test
async def test_request_ownership(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(handle_cache_arrays(dut))
cocotb.start_soon(handle_lru_arrays(dut))
cocotb.start_soon(handle_writeback(dut))
cocotb.start_soon(handle_bus_interface(dut))
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await RisingEdge(dut.o_rdy)
INDEX = 2
# Write with tag way + 1
for way in range(WAYS):
write_cacheline(INDEX, way, bytes([0xaa] * 64), MesiState.MESI_SHARED, way+1)
# write with tag 0x2
dut.i_cpu_tag.value = 0
dut.i_cpu_index.value = INDEX
dut.i_cpu_offset.value = 2
dut.i_cpu_data.value = 0xaa
dut.i_rdy.value = 1
dut.i_cpu_we.value = 1
await RisingEdge(dut.i_clk)
dut.i_cpu_data.value = 0
dut.i_cpu_tag.value = 2
await RisingEdge(dut.i_clk)
dut.i_cpu_tag.value = 0
await Timer(1, "us")
@cocotb.test
async def test_way_read_thrash(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(handle_cache_arrays(dut))
cocotb.start_soon(handle_lru_arrays(dut))
cocotb.start_soon(handle_writeback(dut))
cocotb.start_soon(handle_bus_interface(dut))
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await RisingEdge(dut.o_rdy)
for tag in range(32):
dut.i_cpu_tag.value = tag
dut.i_cpu_index.value = 0
dut.i_cpu_offset.value = 0
dut.i_rdy.value = 1
await RisingEdge(dut.i_clk)
while not dut.o_rdy.value:
await RisingEdge(dut.i_clk)
await Timer(1, "us")

View File

@@ -5,3 +5,9 @@ tests:
- "application_wrapper_cache_arrays_test"
sources: "sources.list"
waves: True
- name: "application_wrapper_cache_miss_handler_test"
toplevel: "application_wrapper_cache_miss_handler"
modules:
- "application_wrapper_cache_miss_handler_test"
sources: "sources.list"
waves: True

View File

@@ -1,3 +1,5 @@
import application_wrapper_cache_pkg::*;
module application_wrapper_cache_miss_handler #(
parameter NUM_WAYS = 4,
parameter NUM_SETS = 64,
@@ -7,7 +9,7 @@ module application_wrapper_cache_miss_handler #(
localparam OFFSET_W = 6,
localparam INDEX_W = $clog2(NUM_SETS),
localparam TAG_W = 32 - INDEX_W - OFFSET_W,
localparam LRU_W = NUM_WAYS-1
localparam LRU_W = NUM_WAYS-1,
localparam META_W = TAG_W + 2
) (
@@ -41,48 +43,89 @@ module application_wrapper_cache_miss_handler #(
output logic [INDEX_W-1:0] o_lru_read_index,
output logic o_lru_read_valid,
input logic [LRU_W-1:0]] i_lru_read_data,
input logic [LRU_W-1:0] i_lru_read_data,
output logic [INDEX_W-1:0] o_lru_write_index,
output logic o_lru_write_valid,
output logic [LRU_W-1:0]] o_lru_write_data,
output logic [LRU_W-1:0] o_lru_write_data,
output logic [DATA_W-1:0] o_writeback_data,
output logic [31:0] o_writeback_addr,
output logic o_writeback_valid,
input logic i_writeback_done,
output logic [31:0] o_memory_addr,
output logic o_memory_valid,
output cache_cmd_e o_memory_cmd,
input logic [DATA_W-1:0] i_memory_data,
input logic i_memory_done,
input cache_resp_e i_memory_resp
);
enum logic [3:0] {
RESET,
CLEAR_MEMORY,
IDLE,
CHECK_VICTIM,
WRITEBACK,
WAIT_WRITEBACK_ACK,
REQUEST_MEMORY,
WAIT_MEMORY,
INSTALL_LINE,
UPDATE_LRU,
REQUEST_OWNERSHIP
} state, state_next;
logic cpu_we_d1;
logic cpu_i_data_d1;
logic [INDEX_W-1:0] clear_index, clear_index_next;
logic [INDEX_W-1:0] cpu_index_d1;
logic [OFFSET_W-1:0] cpu_offset_d1;
logic cpu_we_d1;
logic [CPU_W-1:0] cpu_i_data_d1;
logic [TAG_W-1:0] cpu_tag_d1;
logic [INDEX_W-1:0] cpu_index_d1, cpu_index_d2;
logic [OFFSET_W-1:0] cpu_offset_d1, cpu_offset_d2;
logic [TAG_W-1:0] cpu_tag_new, cpu_tag_new_next;
logic [INDEX_W-1:0] cpu_index_new, cpu_index_new_next;
logic [OFFSET_W-1:0] cpu_offset_new, cpu_offset_new_next;
logic [$clog2(NUM_WAYS)-1:0] cpu_way_new, cpu_way_new_next;
logic [7:0] cpu_data_new, cpu_data_new_next;
logic previous_was_valid, previous_was_valid_next;
logic way_match_found;
logic [NUM_WAYS-1:0] way_select_mask;
logic [$clog2(NUM_WAYS)-1:0] way_select_idx;
mesi_e mesi;
logic [TAG_W-1:0] tag;
logic [31:0] read_req_addr, read_req_addr_next;
always_ff @(posedge i_clk) begin
if (i_rst) begin
state <= IDLE;
state <= RESET;
end else begin
state <= state_next;
end
previous_was_valid <= previous_was_valid_next;
read_req_addr <= read_req_addr_next;
cpu_offset_new <= cpu_offset_new_next;
cpu_index_new <= cpu_index_new_next;
cpu_tag_new <= cpu_tag_new_next;
cpu_way_new <= cpu_way_new_next;
cpu_data_new <= cpu_data_new_next;
clear_index <= clear_index_next;
cpu_we_d1 <= i_cpu_we;
cpu_i_data_d1 <= i_cpu_data
cpu_i_data_d1 <= i_cpu_data;
cpu_index_d1 <= i_cpu_index;
cpu_index_d2 <= cpu_index_d1;
cpu_tag_d1 <= i_cpu_tag;
cpu_offset_d1 <= i_cpu_offset;
cpu_offset_d2 <= cpu_offset_d1;
end
always_comb begin
@@ -103,30 +146,72 @@ always_comb begin
o_lru_write_index = '0;
o_lru_write_data = '0;
o_writeback_data = '0;
o_writeback_addr = '0;
o_writeback_valid = '0;
o_memory_addr = '0;
o_memory_valid = '0;
o_memory_cmd = CACHE_CMD_NONE;
way_match_found = '0;
way_select_mask = '0;
way_select_idx = '0;
mesi = MESI_INVALID;
tag = '0;
cpu_offset_new_next = cpu_offset_new;
cpu_index_new_next = cpu_index_new;
cpu_tag_new_next = cpu_tag_new;
cpu_way_new_next = cpu_way_new;
cpu_data_new_next = cpu_data_new;
read_req_addr_next = read_req_addr;
clear_index_next = clear_index;
previous_was_valid_next = previous_was_valid;
state_next = state;
case (state)
RESET: begin
state_next = CLEAR_MEMORY;
clear_index_next = '0;
previous_was_valid_next = '0;
end
CLEAR_MEMORY: begin
o_write_valid = '1;
o_write_data = '0;
o_write_meta = {MESI_INVALID, (TAG_W)'('0)};
o_write_index = clear_index;
o_lru_write_index = clear_index;
o_lru_write_data = '0;
o_lru_write_valid = '1;
clear_index_next = clear_index + 1;
if (clear_index_next == '0) begin
state_next = IDLE;
end
end
IDLE: begin
// by default, o_rdy is 1 unless something is wrong
o_rdy = '1;
// Read from arrays
o_read_index = i_cpu_index;
o_read_valid = i_rdy;
o_lru_read_index = i_cpu_index;
o_lru_read_valid = i_rdy;
if (previous_was_valid) begin
// data from previous cycle that was read from arrays
way_match_found = '0;
way_select_mask = '0;
for (int i; i < NUM_WAYS; i++) begin
for (int i = 0; i < NUM_WAYS; i++) begin
{mesi, tag} = i_read_meta[i];
if (tag == i_cpu_tag && mesi != MESI_INVALID) begin
way_match_found = '1;
way_select_mask[i] = '1;
way_select_idx = 2'(i);
break;
end
end
@@ -139,19 +224,30 @@ always_comb begin
// If we are in S then we need to request ownership before
// we can modify it.
if (mesi == MESI_MODIFIED || mesi == MESI_EXCLUSIVE) begin
o_write_data = i_read_data;
o_write_data[cpu_offset_d1 +: 8] = cpu_i_data_d1;
o_write_data = i_read_data[way_select_idx];
o_write_data[cpu_offset_d1*8 +: CPU_W] = cpu_i_data_d1;
o_write_meta = {MESI_MODIFIED, i_cpu_tag};
o_write_valid = way_select_mask;
o_write_index = cpu_index_d1;
end else begin
o_rdy = '0;
o_memory_addr = {i_cpu_tag, cpu_index_d1, (OFFSET_W)'('0)};
o_memory_cmd = CACHE_CMD_CLEAN_UNIQUE;
o_memory_valid = '1;
cpu_offset_new_next = cpu_offset_d1;
cpu_index_new_next = cpu_index_d1;
cpu_tag_new_next = i_cpu_tag;
cpu_way_new_next = way_select_idx;
cpu_data_new_next = cpu_i_data_d1;
state_next = REQUEST_OWNERSHIP;
end
end else begin
// Send the data to the CPU
o_cpu_data = i_read_data[cpu_offset_d1 +: 8];
o_cpu_data = i_read_data[way_select_idx][cpu_offset_d1*8 +: CPU_W];
end
// update lru
@@ -181,12 +277,188 @@ always_comb begin
o_lru_write_data[0] = '0;
o_lru_write_data[2] = '0;
end
endcase
default: begin
end
endcase
end else begin
o_rdy = '0;
state_next = CHECK_VICTIM;
end
end
// Read from arrays
o_read_index = i_cpu_index;
o_read_valid = i_rdy & o_rdy;
o_lru_read_index = i_cpu_index;
o_lru_read_valid = i_rdy & o_rdy;
previous_was_valid_next = '1;
end
REQUEST_OWNERSHIP: begin
if (i_memory_done) begin
// write to the cacheline here.
o_write_data = i_read_data[cpu_way_new];
o_write_data[cpu_offset_new*8 +: CPU_W] = cpu_data_new;
o_write_meta = {MESI_MODIFIED, cpu_tag_new};
o_write_valid = (1 << cpu_way_new);
o_write_index = cpu_index_new;
state_next = IDLE;
// update lru
// start by copying the read data, then change the bits
// based on what we matched.
o_lru_write_index = cpu_index_new;
o_lru_write_data = i_lru_read_data;
o_lru_write_valid = '1;
case (1 << cpu_way_new)
4'b0001: begin
o_lru_write_data[0] = '1;
o_lru_write_data[1] = '1;
end
4'b0010: begin
o_lru_write_data[0] = '1;
o_lru_write_data[1] = '0;
end
4'b0100: begin
o_lru_write_data[0] = '0;
o_lru_write_data[2] = '1;
end
4'b1000: begin
o_lru_write_data[0] = '0;
o_lru_write_data[2] = '0;
end
default: begin
end
endcase
end
end
CHECK_VICTIM: begin
// first use the LRU, then overwrite if there was an invalid way
way_select_idx[0] = i_lru_read_data[0];
way_select_idx[1] = way_select_idx[0] ? i_lru_read_data[2] : i_lru_read_data[1];
for (int i = 0; i < NUM_WAYS; i++) begin
{mesi, tag} = i_read_meta[i];
if (mesi == MESI_INVALID) begin
way_select_idx = 2'(i);
break;
end
end
{mesi, tag} = i_read_meta[way_select_idx];
if (mesi == MESI_MODIFIED) begin
o_writeback_data = i_read_data[way_select_idx];
o_writeback_addr = {tag, cpu_index_d2, (OFFSET_W)'('0)};
o_writeback_valid = '1;
state_next = WAIT_WRITEBACK_ACK;
end else if (mesi == MESI_EXCLUSIVE || mesi == MESI_SHARED) begin
o_memory_addr = {tag, cpu_index_d2, (OFFSET_W)'('0)};
o_memory_valid = '1;
o_memory_cmd = CACHE_CMD_EVICT;
state_next = WAIT_WRITEBACK_ACK;
end else begin
state_next = REQUEST_MEMORY;
end
read_req_addr_next = {cpu_tag_d1, cpu_index_d2, (OFFSET_W)'('0)};
cpu_offset_new_next = cpu_offset_d2;
cpu_index_new_next = cpu_index_d2;
cpu_tag_new_next = cpu_tag_d1;
cpu_way_new_next = way_select_idx;
end
WAIT_WRITEBACK_ACK: begin
// This state is also used when sending the EVICT command,
// before sending the read.
if (i_writeback_done || i_memory_done) begin
state_next = REQUEST_MEMORY;
end
end
REQUEST_MEMORY: begin
// This state can be put into WAIT_WRITEBACK_ACK and CHECK_VICTIM
o_memory_addr = read_req_addr;
o_memory_valid = '1;
// if the cache hit was a write, we should read unique, so we can be
// sure that we are given EXCLUSIVE and can set it to MODIFIED right away
o_memory_cmd = CACHE_CMD_READ;
state_next = WAIT_MEMORY;
end
WAIT_MEMORY: begin
// need to handle if this was a write miss
if (i_memory_done) begin
o_write_valid = (1 << cpu_way_new);
o_write_data = i_memory_data;
o_write_index = cpu_index_new;
if (i_memory_resp == CACHE_RSP_SHARED) begin
o_write_meta = {MESI_SHARED, cpu_tag_new};
end else if (i_memory_resp == CACHE_RSP_EXCLUSIVE) begin
o_write_meta = {MESI_EXCLUSIVE, cpu_tag_new};
end
o_rdy = '1;
o_cpu_data = i_memory_data[cpu_offset_new*8 +: CPU_W];
// update lru
// start by copying the read data, then change the bits
// based on what we matched.
o_lru_write_index = cpu_index_new;
o_lru_write_data = i_lru_read_data;
o_lru_write_valid = '1;
case (1 << cpu_way_new)
4'b0001: begin
o_lru_write_data[0] = '1;
o_lru_write_data[1] = '1;
end
4'b0010: begin
o_lru_write_data[0] = '1;
o_lru_write_data[1] = '0;
end
4'b0100: begin
o_lru_write_data[0] = '0;
o_lru_write_data[2] = '1;
end
4'b1000: begin
o_lru_write_data[0] = '0;
o_lru_write_data[2] = '0;
end
default: begin
end
endcase
o_read_index = i_cpu_index;
o_read_valid = i_rdy & o_rdy;
o_lru_read_index = i_cpu_index;
o_lru_read_valid = i_rdy & o_rdy;
state_next = IDLE;
end
end
default: begin
state_next = IDLE;
end
endcase
end
endmodule

View File

@@ -11,18 +11,25 @@ package application_wrapper_cache_pkg;
} page_table_entry_t;
typedef enum logic [2:0] {
CACHE_NONE,
CACHE_READ_SHARED,
CACHE_READ_UNIQUE,
CACHE_WRITE,
CACHE_CLEAN_UNIQUE
CACHE_CMD_NONE,
CACHE_CMD_READ,
CAHCE_CMD_READ_UNIQUE,
CACHE_CMD_WRITE,
CACHE_CMD_CLEAN_UNIQUE,
CACHE_CMD_EVICT
} cache_cmd_e;
typedef enum logic [1:0] {
MESI_MODIFIED,
MESI_EXCLUSIVE,
CACHE_RSP_NONE,
CACHE_RSP_SHARED,
CACHE_RSP_EXCLUSIVE
} cache_resp_e;
typedef enum logic [1:0] {
MESI_INVALID,
MESI_SHARED,
MESI_INVALID
MESI_EXCLUSIVE,
MESI_MODIFIED
} mesi_e;
endpackage

View File

@@ -1,5 +1,6 @@
cache/application_wrapper_cache_pkg.sv
cache/application_wrapper_cache_arrays.sv
cache/application_wrapper_cache_miss_handler.sv
cache/application_wrapper_mmu.sv
cache/application_wrapper_cache_top.sv