Add cache arrays and test

This commit is contained in:
2026-05-22 22:27:53 -07:00
parent 3ea31e40aa
commit df25550c8a
6 changed files with 288 additions and 427 deletions

View File

@@ -0,0 +1,205 @@
import cocotb
from cocotb.handle import LogicArray
from cocotb.clock import Clock
from cocotb.triggers import ReadOnly, NextTimeStep, RisingEdge
import logging
import random
logger = logging.getLogger()
logger.setLevel(logging.INFO)
CLK_PERIOD = 5
SETS = 64
WAYS = 4
data_arrays = [{}, {}, {}, {}]
meta_arrays = [{}, {}, {}, {}]
@cocotb.test
async def test_sanity(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
await RisingEdge(dut.i_clk)
await RisingEdge(dut.i_clk)
for index in range(SETS):
for way in range(WAYS):
data = random.randbytes(64)
meta = random.randint(0, 2**22-1)
data_arrays[way][index] = data
meta_arrays[way][index] = meta
dut.i_cpu_write_data.value = LogicArray.from_bytes(data, byteorder="little")
dut.i_cpu_write_meta.value = meta
dut.i_cpu_write_index.value = index
dut.i_cpu_write_valid.value = 1 << way
await RisingEdge(dut.i_clk)
dut.i_cpu_write_valid.value = 0
await RisingEdge(dut.i_clk)
for index in range(SETS):
dut.i_cpu_read_index.value = index
dut.i_cpu_read_valid.value = 1
await RisingEdge(dut.i_clk)
await ReadOnly()
raw_data = dut.o_cpu_read_data.value
raw_meta = dut.o_cpu_read_meta.value
meta = [int(m) for m in raw_meta]
data_bytes = [v.to_bytes(byteorder="little") for v in raw_data]
expected_data = [data_arrays[way][index] for way in range(WAYS)]
expected_meta = [meta_arrays[way][index] for way in range(WAYS)]
if data_bytes != expected_data:
logger.info("Data Error")
if meta != expected_meta:
logger.info("Meta Error")
await NextTimeStep()
dut.i_cpu_read_valid.value = 0
for index in range(SETS):
for way in range(WAYS):
data = random.randbytes(64)
meta = random.randint(0, 2**22-1)
data_arrays[way][index] = data
meta_arrays[way][index] = meta
dut.i_snoop_write_data.value = LogicArray.from_bytes(data, byteorder="little")
dut.i_snoop_write_meta.value = meta
dut.i_snoop_write_index.value = index
dut.i_snoop_write_valid.value = 1 << way
await RisingEdge(dut.i_clk)
dut.i_snoop_write_valid.value = 0
await RisingEdge(dut.i_clk)
for index in range(SETS):
dut.i_snoop_read_index.value = index
dut.i_snoop_read_valid.value = 1
await RisingEdge(dut.i_clk)
await ReadOnly()
raw_data = dut.o_snoop_read_data.value
raw_meta = dut.o_snoop_read_meta.value
meta = [int(m) for m in raw_meta]
data_bytes = [v.to_bytes(byteorder="little") for v in raw_data]
expected_data = [data_arrays[way][index] for way in range(WAYS)]
expected_meta = [meta_arrays[way][index] for way in range(WAYS)]
if data_bytes != expected_data:
logger.info("Data Error")
if meta != expected_meta:
logger.info("Meta Error")
await NextTimeStep()
dut.i_snoop_read_valid.value = 0
@cocotb.test
async def test_random_access(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
ITERS = 1024
for _ in range(ITERS):
cpu_write_way = random.randint(0, WAYS-1)
cpu_write_set = random.randint(0, SETS-1)
while True:
snoop_write_way = random.randint(0, WAYS-1)
snoop_write_set = random.randint(0, SETS-1)
if snoop_write_way != cpu_write_way and snoop_write_set != cpu_write_set:
break
cpu_write_data = random.randbytes(64)
cpu_write_meta = random.randint(0, 2**22-1)
snoop_write_data = random.randbytes(64)
snoop_write_meta = random.randint(0, 2**22-1)
data_arrays[cpu_write_way][cpu_write_set] = cpu_write_data
meta_arrays[cpu_write_way][cpu_write_set] = cpu_write_meta
data_arrays[snoop_write_way][snoop_write_set] = snoop_write_data
meta_arrays[snoop_write_way][snoop_write_set] = snoop_write_meta
dut.i_cpu_write_data.value = LogicArray.from_bytes(cpu_write_data, byteorder="little")
dut.i_cpu_write_meta.value = cpu_write_meta
dut.i_cpu_write_index.value = cpu_write_set
dut.i_cpu_write_valid.value = 1 << cpu_write_way
dut.i_snoop_write_data.value = LogicArray.from_bytes(snoop_write_data, byteorder="little")
dut.i_snoop_write_meta.value = snoop_write_meta
dut.i_snoop_write_index.value = snoop_write_set
dut.i_snoop_write_valid.value = 1 << snoop_write_way
cpu_read_way = random.randint(0, WAYS-1)
cpu_read_set = random.randint(0, SETS-1)
snoop_read_way = random.randint(0, WAYS-1)
snoop_read_set = random.randint(0, SETS-1)
dut.i_cpu_read_index.value = cpu_read_set
dut.i_snoop_read_index.value = snoop_read_set
dut.i_cpu_read_valid.value = 1
dut.i_snoop_read_valid.value = 1
await RisingEdge(dut.i_clk)
await ReadOnly()
cpu_data = dut.o_cpu_read_data.value[cpu_read_way].to_bytes(byteorder="little")
cpu_meta = int(dut.o_cpu_read_meta.value[cpu_read_way])
snoop_data = dut.o_snoop_read_data.value[snoop_read_way].to_bytes(byteorder="little")
snoop_meta = int(dut.o_snoop_read_meta.value[snoop_read_way])
cpu_expected_data = data_arrays[cpu_read_way][cpu_read_set]
cpu_expected_meta = meta_arrays[cpu_read_way][cpu_read_set]
snoop_expected_data = data_arrays[snoop_read_way][snoop_read_set]
snoop_expected_meta = meta_arrays[snoop_read_way][snoop_read_set]
if cpu_data != cpu_expected_data:
logger.error("CPU Data Error")
if cpu_meta != cpu_expected_meta:
logger.info("CPU Meta Error")
if snoop_data != snoop_expected_data:
logger.error("snoop Data Error")
if snoop_meta != snoop_expected_meta:
logger.info("snoop Meta Error")
await NextTimeStep()

View File

@@ -1,7 +1,7 @@
tests:
- name: "application_wrapper_cache_l1_test"
toplevel: "application_wrapper_cache_l1"
- name: "application_wrapper_cache_arrays_test"
toplevel: "application_wrapper_cache_arrays"
modules:
- "application_wrapper_cache_l1_test"
- "application_wrapper_cache_arrays_test"
sources: "sources.list"
waves: True

View File

@@ -0,0 +1,72 @@
module application_wrapper_cache_arrays #(
parameter NUM_WAYS = 4,
parameter NUM_SETS = 64,
localparam DATA_W = 64*8,
localparam OFFSET_W = 6,
localparam INDEX_W = $clog2(NUM_SETS),
localparam TAG_W = 32 - INDEX_W - OFFSET_W,
localparam META_W = TAG_W + 2
) (
input logic i_clk,
input logic [INDEX_W-1:0] i_cpu_read_index,
input logic i_cpu_read_valid,
output logic [DATA_W-1:0] o_cpu_read_data [NUM_WAYS],
output logic [META_W-1:0] o_cpu_read_meta [NUM_WAYS],
input logic [INDEX_W-1:0] i_cpu_write_index,
input logic [NUM_WAYS-1:0] i_cpu_write_valid,
input logic [DATA_W-1:0] i_cpu_write_data,
input logic [META_W-1:0] i_cpu_write_meta,
input logic [INDEX_W-1:0] i_snoop_read_index,
input logic i_snoop_read_valid,
output logic [DATA_W-1:0] o_snoop_read_data [NUM_WAYS],
output logic [META_W-1:0] o_snoop_read_meta [NUM_WAYS],
input logic [INDEX_W-1:0] i_snoop_write_index,
input logic [NUM_WAYS-1:0] i_snoop_write_valid,
input logic [DATA_W-1:0] i_snoop_write_data,
input logic [META_W-1:0] i_snoop_write_meta
);
// memory arrays.
// In order to make these WRITE_FIRST, we put a blocking assignment
// for the write data before the assignment to the read data
logic [DATA_W-1:0] data_arrays [NUM_SETS][NUM_WAYS];
logic [META_W-1:0] meta_arrays [NUM_SETS][NUM_WAYS];
always @(posedge i_clk) begin
for (int i = 0; i < NUM_WAYS; i++) begin
if (i_cpu_write_valid[i]) begin
data_arrays[i_cpu_write_index][i] = i_cpu_write_data;
meta_arrays[i_cpu_write_index][i] = i_cpu_write_meta;
end
if (i_snoop_write_valid[i]) begin
data_arrays[i_snoop_write_index][i] = i_snoop_write_data;
meta_arrays[i_snoop_write_index][i] = i_snoop_write_meta;
end
end
if (i_cpu_read_valid) begin
o_cpu_read_data = data_arrays[i_cpu_read_index];
o_cpu_read_meta = meta_arrays[i_cpu_read_index];
end
if (i_snoop_read_valid) begin
o_snoop_read_data = data_arrays[i_snoop_read_index];
o_snoop_read_meta = meta_arrays[i_snoop_read_index];
end
end
endmodule

View File

@@ -1,423 +0,0 @@
import application_wrapper_cache_pkg::*;
module application_wrapper_cache_l1 #(
parameter CACHELINE_SIZE = 64,
parameter CACHELINE_COUNT = 64,
localparam ADDR_WIDTH = 32
)(
input logic i_clk,
input logic i_rst,
/* CPU Interface */
input logic [ADDR_WIDTH-1:0] i_addr,
input logic i_we,
input logic i_sync,
input logic [7:0] i_data,
output logic [7:0] o_data,
input logic i_rdy,
output logic o_rdy,
/* MMU Interface */
input logic [ADDR_WIDTH-1:0] i_phys_address,
output page_table_entry_t i_table_entry,
input logic i_mmu_valid,
/* Higher level cache interface */
output logic [ADDR_WIDTH-1:0] o_cache_addr,
output cache_cmd_e o_cache_cmd,
output logic o_cache_valid,
output logic [CACHELINE_SIZE*8-1:0] o_cache_data,
input logic [CACHELINE_SIZE*8-1:0] i_cache_data,
input logic i_cache_rdy
);
// we have 32 bit addresses, 64 byte cache lines, and 64 total lines.
// Thats 6 bit for offset, 6 bit for index, and 20 bit for tag.
// cache is virtually indexed, physically tagged
localparam OFFSET_W = $clog2(CACHELINE_SIZE);
localparam INDEX_W = $clog2(CACHELINE_COUNT);
localparam TAG_W = ADDR_WIDTH - INDEX_W - OFFSET_W;
localparam META_W = 3; // valid, unique, clean
typedef struct {
logic [TAG_W-1:0] tag;
logic valid;
logic shared;
logic clean;
} meta_tag_t;
logic [OFFSET_W-1:0] offset, offset_d1;
logic [INDEX_W-1:0] index, index_d1, index_d2;
logic [TAG_W-1:0] tag, tag_d1;
// cacheline size is in bytes, not bits
// direct mapped cache, read one line so we have data ready if its a hit.
logic [CACHELINE_SIZE*8-1:0] data_array [CACHELINE_COUNT];
meta_tag_t meta_tag_array [CACHELINE_COUNT];
logic [CACHELINE_SIZE*8-1:0] current_data, current_data_next, write_data_prev;
meta_tag_t current_meta_tag, current_meta_tag_next;
logic [OFFSET_W-1:0] read_offset, read_offset_next;
logic [INDEX_W-1:0] read_index, read_index_next;
logic [ADDR_WIDTH-1:0] read_address, read_address_next;
logic [ADDR_WIDTH-1:0] write_address, write_address_next;
logic [CACHELINE_SIZE*8-1:0] write_data;
meta_tag_t write_meta_tag;
logic [INDEX_W-1:0] write_index;
logic data_write_enable;
logic meta_tag_write_enable;
logic we_d1;
logic latched_we, latched_we_next;
logic [7:0] latched_data, latched_data_next;
logic [7:0] data_d1;
// performance counters
logic [31:0] eviction_count, eviction_count_next;
logic [31:0] cache_miss_count, cache_miss_count_next;
// reset counter
logic [INDEX_W-1:0] clear_counter, clear_counter_next;
enum logic [2:0] {RESET, CLEAR, IDLE, READY, EVICT, READ, WAIT_CLEAN_UNIQUE} prev_state, state, state_next;
always_ff @(posedge i_clk) begin
if (i_rst) begin
state <= RESET;
current_data <= '0;
tag_d1 <= '0;
index_d1 <= '0;
offset_d1 <= '0;
read_address <= '0;
write_address <= '0;
latched_we <= '0;
latched_data <= '0;
eviction_count <= '0;
cache_miss_count <= '0;
clear_counter <= '0;
end else begin
prev_state <= state;
state <= state_next;
current_data <= current_data_next;
write_data_prev <= write_data;
current_meta_tag <= current_meta_tag_next;
read_offset <= read_offset_next;
read_index <= read_index_next;
read_address <= read_address_next;
write_address <= write_address_next;
if (data_write_enable) begin
data_array[write_index] <= write_data;
end
if (meta_tag_write_enable) begin
meta_tag_array[write_index] <= write_meta_tag;
end
tag_d1 <= tag;
index_d1 <= index;
index_d2 <= index_d1;
offset_d1 <= offset;
we_d1 <= i_we;
data_d1 <= i_data;
latched_we <= latched_we_next;
latched_data <= latched_data_next;
eviction_count <= eviction_count_next;
cache_miss_count <= cache_miss_count_next;
clear_counter <= clear_counter_next;
end
end
always_comb begin
state_next = state;
current_data_next = current_data;
current_meta_tag_next = current_meta_tag;
read_offset_next = read_offset;
read_index_next = read_index;
read_address_next = read_address;
write_address_next = write_address;
latched_we_next = latched_we;
latched_data_next = latched_data;
o_rdy = '0;
o_cache_valid = '0;
o_cache_cmd = CACHE_NONE;
o_cache_addr = '0;
o_cache_data = '0;
// vipt
offset = i_addr[OFFSET_W-1:0];
index = i_addr[INDEX_W+OFFSET_W-1:OFFSET_W];
tag = i_phys_address[INDEX_W+OFFSET_W+TAG_W-1:INDEX_W+OFFSET_W];
write_index = '0;
write_data = '0;
data_write_enable = '0;
write_meta_tag.tag = '0;
write_meta_tag.valid = '0;
write_meta_tag.shared = '0;
write_meta_tag.clean = '0;
meta_tag_write_enable = '0;
o_data = '0;
eviction_count_next = eviction_count;
cache_miss_count_next = cache_miss_count;
clear_counter_next = clear_counter;
case (state)
RESET: begin
state_next = CLEAR;
end
CLEAR: begin
if (clear_counter == (INDEX_W)'(CACHELINE_COUNT-1)) begin
state_next = IDLE;
end
write_data = '0;
data_write_enable = '1;
meta_tag_write_enable = '1;
write_meta_tag.tag = '0;
write_meta_tag.valid = '0;
write_meta_tag.shared = '0;
write_meta_tag.clean = '0;
write_index = clear_counter;
clear_counter_next = clear_counter + 1;
end
IDLE: begin
state_next = READY;
current_data_next = data_array[index];
current_meta_tag_next = meta_tag_array[index];
o_rdy = '1;
end
READY: begin
if (!current_meta_tag.valid || (current_meta_tag.valid && current_meta_tag.tag != tag_d1 && current_meta_tag.clean)) begin
// current line is not valid, just read
// OR current line is valid, but clean so we don't need to write it back.
state_next = READ;
read_index_next = index_d1;
read_offset_next = offset_d1;
read_address_next = {i_phys_address[31:OFFSET_W], (OFFSET_W)'('0)};
latched_we_next = we_d1;
latched_data_next = data_d1;
cache_miss_count_next = cache_miss_count + 1;
end else if (current_meta_tag.valid && current_meta_tag.tag != tag_d1 && !current_meta_tag.clean) begin
// current line was valid, but the wrong tag.
state_next = EVICT;
read_index_next = index_d1;
read_offset_next = offset_d1;
read_address_next = {i_phys_address[31:OFFSET_W], (OFFSET_W)'('0)};
write_address_next = {current_meta_tag.tag, index_d1, (OFFSET_W)'('0)};
latched_we_next = we_d1;
latched_data_next = data_d1;
cache_miss_count_next = cache_miss_count + 1;
eviction_count_next = eviction_count + 1;
end else begin
latched_we_next = i_we;
latched_data_next = i_data;
// always be loading the next data array
current_data_next = data_array[index];
current_meta_tag_next = meta_tag_array[index];
// We are accessing something we just wrote to
if (latched_we) begin
// if we are writing to a shared cacheline, we must make it unique first!
if (current_data.shared) begin
o_rdy = '0;
o_cache_cmd = CACHE_CLEAN_UNIQUE;
o_cache_valid = '1;
latched_we_next = we_d1;
latched_data_next = data_d1;
read_index_next = index_d1;
read_offset_next = offset_d1;
state_next = WAIT_CLEAN_UNIQUE;
end
write_data = current_data;
write_data[offset_d1*8 +: 8] = latched_data;
data_write_enable = '1;
meta_tag_write_enable = '1;
write_meta_tag = current_meta_tag;
write_meta_tag.clean = '0;
write_index = index_d1;
if (index == write_index) begin
current_data_next = write_data;
end
end else begin
// we have a possible RAW hazard, but not after READ state
if (prev_state == READY && index_d1 == index_d2) begin
o_data = current_data[offset_d1*8 +: 8];
end else begin
o_data = current_data[offset_d1*8 +: 8];
end
o_rdy = '1;
end
end
end
EVICT: begin
o_cache_addr = write_address;
o_cache_cmd = CACHE_WRITE;
o_cache_valid = '1;
o_cache_data = current_data;
if (i_cache_rdy) begin
state_next = READ;
end
end
READ: begin
o_cache_addr = read_address;
if (latched_we) begin
o_cache_cmd = CACHE_READ_UNIQUE;
end else begin
o_cache_cmd = CACHE_READ_SHARED;
end
o_cache_valid = '1;
write_index = read_index;
write_data = i_cache_data;
write_meta_tag.tag = read_address[31:INDEX_W+OFFSET_W];
write_meta_tag.valid = '1;
write_meta_tag.shared = ~latched_we; // if we are about to write, then we requested unique
write_meta_tag.clean = ~latched_we; // if we are about to write, then mark dirty
data_write_enable = i_cache_rdy;
meta_tag_write_enable = i_cache_rdy;
if (i_cache_rdy) begin
state_next = READY;
current_data_next = i_cache_data;
current_meta_tag_next = write_meta_tag;
index = write_index;
tag = read_address[31:INDEX_W+OFFSET_W];
offset = read_offset;
end
end
WAIT_CLEAN_UNIQUE: begin
// Write data after making it unique
if (i_cache_rdy) begin
write_data[offset_d1*8 +: 8] = latched_data;
data_write_enable = i_cache_rdy;
meta_tag_write_enable = i_cache_rdy;
write_meta_tag.tag = read_address[31:INDEX_W+OFFSET_W];
write_meta_tag.valid = '1;
write_meta_tag.shared = '0;
write_meta_tag.clean = '0;
write_index = index_d1;
if (index == write_index) begin
current_data_next = write_data;
end
state_next = READY;
end
end
default: begin
state_next = READY;
end
endcase
end
/*
In the ready state, we read from the data array and if the line is valid
and the tag matches with the address, we present the data to the cpu.
Otherwise, we lower o_rdy and send the request to the higher level cache.
If what we read was valid but the tag didn't match, then we need to evict it.
If the line was not valid, then we don't need to evict it and can just request
the new data.
One thing that we also need is an MMU. The TLB can be 1 cycle, then if the TLB
says that we are allowed to read from the cache, we can read from the cache.
how do we handle writes? Since we take 1 cycle to read from the cache, we cannot
immediately write to the cache line in one cycle, we will have to wait a cycle
in order to determine if the cacheline is valid or not. To do this, we will need
to have it be pipelined, so that we store the data temporarily while we read the
meta_tag array, then if its valid we write to the cache. To avoid RAW hazards, we
also need to store the address and check if we are reading a value we just wrote.
If so, then we return this stored value instead of reading from ram, since we would
be reading at the same time as we are writing, and that could be undefined.
basically if the index matches the previous access, then we have a hazard and need to
use the stored cacheline instead of the cacheline we read from memory, since that hasn't
been updated yet. We don't need to cache metatag since if we just wrote to it, it will
already be dirty anyway.
*/
/*
In order for this to be a coherent requester, we need to adjust the protocol slightly
* CPU Requests data to READ: Send ReadShared request
* CPU Requests data to WRITE: Send ReadUnique request
* CPU Writes to data already in cache: CleanUnique
* Clean cacheline overwritten: Send Evict request
* Dirty cacheline overwritten: Send WriteBackFull request
The cache <-> CHI interface is handled by a separate module.
*/
endmodule

View File

@@ -18,4 +18,11 @@ package application_wrapper_cache_pkg;
CACHE_CLEAN_UNIQUE
} cache_cmd_e;
typedef enum logic [1:0] {
MODIFIED,
EXCLUSIVE,
SHARED,
INVALID
} mesi_e;
endpackage

View File

@@ -1,5 +1,5 @@
cache/application_wrapper_cache_pkg.sv
cache/application_wrapper_cache_l1.sv
cache/application_wrapper_cache_arrays.sv
cache/application_wrapper_mmu.sv
cache/application_wrapper_cache_top.sv