Compare commits

..

6 Commits

Author SHA1 Message Date
cdbb6a9720 Get it to ACTUALLY compile :) 2026-05-24 20:30:34 -07:00
6c6c3d295b Add design doc
but I didn't really read it
2026-05-24 20:09:02 -07:00
a21cc4241a Get it to compile at least 2026-05-24 20:06:08 -07:00
151643b2ad Get it working more 2026-05-24 17:13:20 -07:00
61ee654b18 Get it roughly working 2026-05-24 15:53:50 -07:00
aa8c4a64df First shot at happy path 2026-05-22 23:54:33 -07:00
14 changed files with 1558 additions and 506 deletions

View File

@@ -1,493 +0,0 @@
import cocotb
from cocotb.handle import Immediate, LogicArray
from cocotb.simulator import get_sim_time
from cocotb.clock import Clock
from cocotb.triggers import Timer, RisingEdge, FallingEdge, with_timeout
from enum import IntEnum
from collections import defaultdict
from collections.abc import Mapping
import logging
import random
logger = logging.getLogger()
logger.setLevel(logging.INFO)
CLK_PERIOD = 5
reference_cache_data = defaultdict(bytearray)
higher_cache_data = defaultdict(bytearray)
async def cpu_sequencer(dut, sequence: Mapping[int, int, bool, bool]):
addr, do, we, sync = sequence[0]
dut.i_addr.value = addr
dut.i_data.value = do
dut.i_we.value = we
dut.i_sync.value = sync
await FallingEdge(dut.i_rst)
index = 1
while index < len(sequence):
await RisingEdge(dut.i_clk)
if not dut.o_rdy.value:
continue
addr, do, we, sync = sequence[index]
dut.i_addr.value = addr
dut.i_data.value = do
dut.i_we.value = we
dut.i_sync.value = sync
index += 1
await Timer(150, "ns")
async def cpu_data_monitor(dut):
previous_address = 0
address = 0
we = 0
previous_we = 0
i_data = 0
previous_i_data = 0
await FallingEdge(dut.i_rst)
while True:
await RisingEdge(dut.i_clk)
if not dut.o_rdy.value:
continue
previous_address = address
previous_we = we
address = int(dut.i_addr.value)
we = int(dut.i_we.value)
previous_i_data = i_data
i_data = int(dut.i_data.value)
data = int(dut.o_data.value)
if previous_address == 0:
continue
# don't care if it was a write
if previous_we:
index = (previous_address // 64) % 64
offset = previous_address % 64
cacheline = reference_cache_data[index]
cacheline[offset] = previous_i_data
logger.debug(f"We saw a write here {index=} {offset=} previous_data={previous_i_data:x}")
else:
index = (previous_address // 64) % 64
offset = previous_address % 64
cacheline = reference_cache_data[index]
expected_data = cacheline[offset]
if (data != expected_data):
logger.error(f"{get_sim_time()} {address=:x} {previous_address=:x} {data=:x} {expected_data=:x}")
async def mmu_sequencer(dut):
while True:
await RisingEdge(dut.i_clk)
dut.i_phys_address.value = dut.i_addr.value
async def handle_higher_level_cache(dut):
dut.i_cache_rdy.value = 0
class CacheCmd(IntEnum):
CACHE_NONE = 0
CACHE_READ = 1
CACHE_WRITE = 2
while True:
await RisingEdge(dut.i_clk)
dut.i_cache_rdy.value = 0
if not dut.o_cache_valid.value:
continue
cmd = CacheCmd(dut.o_cache_cmd.value)
addr = int(dut.o_cache_addr.value)
logger.debug(f"{cmd=} {addr=}")
if cmd == CacheCmd.CACHE_READ:
if addr not in higher_cache_data:
data = bytearray(random.randbytes(64))
higher_cache_data[addr] = data
dut.i_cache_data.value = LogicArray.from_bytes(higher_cache_data[addr] , byteorder="little")
dut.i_cache_rdy.value = 1
reference_cache_data[int(dut.read_index.value)] = higher_cache_data[addr]
await RisingEdge(dut.i_clk)
dut.i_cache_rdy.value = 0
elif cmd == CacheCmd.CACHE_WRITE:
dut.i_cache_rdy.value = 1
data = dut.o_cache_data.value.to_bytes(byteorder="little")
higher_cache_data[addr] = bytearray(data)
await RisingEdge(dut.i_clk)
dut.i_cache_rdy.value = 0
@cocotb.test
async def sanity_test(dut):
expected_cache_misses = 0
expected_evictions = 0
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(mmu_sequencer(dut))
cocotb.start_soon(handle_higher_level_cache(dut))
cocotb.start_soon(cpu_data_monitor(dut))
cpu_sequence = [
(0x100, 0xaa, True, False),
(0x101, 0xbb, True, False),
(0x100, 0x00, False, False),
(0x101, 0x00, False, False),
(0x200, 0xcc, True, False),
(0x201, 0xdd, True, False),
(0x100, 0x00, False, False),
(0x101, 0x00, False, False),
(0x200, 0x00, False, False),
(0x201, 0x00, False, False),
(0x100, 0x11, True, False),
(0x101, 0x22, True, False),
(0x100, 0x00, False, False),
(0x200, 0x33, True, False),
(0x101, 0x00, False, False),
(0x201, 0x44, True, False),
(0x100, 0x00, False, False),
(0x200, 0x00, False, False),
(0x101, 0x00, False, False),
(0x201, 0x00, False, False),
]
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await cpu_sequencer(dut, cpu_sequence)
expected_cache_misses = 2
expected_evictions = 0
dut_evictions = int(dut.eviction_count.value)
dut_misses = int(dut.cache_miss_count.value)
if dut_evictions != expected_evictions:
logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}")
if dut_misses != expected_cache_misses:
logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}")
@cocotb.test
async def clean_evict_test(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(mmu_sequencer(dut))
cocotb.start_soon(handle_higher_level_cache(dut))
cocotb.start_soon(cpu_data_monitor(dut))
# Read from one cacheline, then read from an aliased cacheline without writing.
# cacheline should be overwritten without evicting
cpu_sequence = [
(0x100, 0x00, False, False),
(0x1100, 0x00, False, False),
]
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await cpu_sequencer(dut, cpu_sequence)
expected_cache_misses = 2
expected_evictions = 0
dut_evictions = int(dut.eviction_count.value)
dut_misses = int(dut.cache_miss_count.value)
if dut_evictions != expected_evictions:
logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}")
if dut_misses != expected_cache_misses:
logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}")
@cocotb.test
async def dirty_evict_test(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(mmu_sequencer(dut))
cocotb.start_soon(handle_higher_level_cache(dut))
cocotb.start_soon(cpu_data_monitor(dut))
# Read from one cacheline, then read from an aliased cacheline without writing.
# cacheline should be overwritten without evicting
cpu_sequence = [
(0x100, 0x41, True, False),
(0x101, 0x42, True, False),
(0x1100, 0x00, False, False),
(0x1100, 0xaa, True, False),
(0x100, 0x00, False, False)
]
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await cpu_sequencer(dut, cpu_sequence)
expected_cache_misses = 3
expected_evictions = 2
dut_evictions = int(dut.eviction_count.value)
dut_misses = int(dut.cache_miss_count.value)
if dut_evictions != expected_evictions:
logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}")
if dut_misses != expected_cache_misses:
logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}")
@cocotb.test
async def long_write_thrash_test(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(mmu_sequencer(dut))
cocotb.start_soon(handle_higher_level_cache(dut))
cocotb.start_soon(cpu_data_monitor(dut))
num_lines_read = 2**20//64
cpu_sequence = [
(i*64, i % 256, True, False)
for i in range(num_lines_read)]
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await cpu_sequencer(dut, cpu_sequence)
# The last 64 lines aren't evicted
expected_cache_misses = num_lines_read
expected_evictions = num_lines_read - 64
dut_evictions = int(dut.eviction_count.value)
dut_misses = int(dut.cache_miss_count.value)
if dut_evictions != expected_evictions:
logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}")
if dut_misses != expected_cache_misses:
logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}")
@cocotb.test
async def long_write_read_thrash_test(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(mmu_sequencer(dut))
cocotb.start_soon(handle_higher_level_cache(dut))
cocotb.start_soon(cpu_data_monitor(dut))
num_lines_read = 2**20//64
cpu_sequence = [
(i*64, i % 256, True, False)
for i in range(num_lines_read)]
cpu_sequence.extend([
(i*64, 0, False, False)
for i in range(num_lines_read)])
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await cpu_sequencer(dut, cpu_sequence)
expected_cache_misses = num_lines_read * 2
expected_evictions = num_lines_read
dut_evictions = int(dut.eviction_count.value)
dut_misses = int(dut.cache_miss_count.value)
if dut_evictions != expected_evictions:
logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}")
if dut_misses != expected_cache_misses:
logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}")
@cocotb.test
async def long_write_linear_test(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(mmu_sequencer(dut))
cocotb.start_soon(handle_higher_level_cache(dut))
cocotb.start_soon(cpu_data_monitor(dut))
num_bytes_read = 2**16
cpu_sequence = [
(i, i % 256, True, False)
for i in range(num_bytes_read)]
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await cpu_sequencer(dut, cpu_sequence)
expected_cache_misses = num_bytes_read // 64
expected_evictions = num_bytes_read//64 - 64
dut_evictions = int(dut.eviction_count.value)
dut_misses = int(dut.cache_miss_count.value)
if dut_evictions != expected_evictions:
logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}")
if dut_misses != expected_cache_misses:
logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}")
@cocotb.test
async def long_write_read_linear_test(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(mmu_sequencer(dut))
cocotb.start_soon(handle_higher_level_cache(dut))
cocotb.start_soon(cpu_data_monitor(dut))
num_bytes_read = 2**16
cpu_sequence = [
(i, i % 256, True, False)
for i in range(num_bytes_read)]
cpu_sequence.extend([
(i, 0, False, False)
for i in range(num_bytes_read)])
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await cpu_sequencer(dut, cpu_sequence)
expected_cache_misses = (num_bytes_read // 64) * 2
expected_evictions = num_bytes_read // 64
dut_evictions = int(dut.eviction_count.value)
dut_misses = int(dut.cache_miss_count.value)
if dut_evictions != expected_evictions:
logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}")
if dut_misses != expected_cache_misses:
logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}")
@cocotb.test
async def short_write_read_linear_test(dut):
# What makes this test "short" is that we read 64 cachelines,
# so we shouldn't have to make any evictions
# TODO add number of evictions and cachlines loaded as performance counteres
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(mmu_sequencer(dut))
cocotb.start_soon(handle_higher_level_cache(dut))
cocotb.start_soon(cpu_data_monitor(dut))
num_bytes_read = 64*64
cpu_sequence = [
(i, i % 256, True, False)
for i in range(num_bytes_read)] # 64 bytes times 64 cachelines
cpu_sequence.extend([
(i, i % 256, False, False)
for i in range(num_bytes_read)]) # 64 bytes times 64 cachelines
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await cpu_sequencer(dut, cpu_sequence)
expected_cache_misses = num_bytes_read//64
expected_evictions = num_bytes_read//64 - 64
dut_evictions = int(dut.eviction_count.value)
dut_misses = int(dut.cache_miss_count.value)
if dut_evictions != expected_evictions:
logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}")
if dut_misses != expected_cache_misses:
logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}")
@cocotb.test
async def random_access_test(dut):
# Just fully random accesses
# This is also kind of a thrash test since this is not realistic
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(mmu_sequencer(dut))
cocotb.start_soon(handle_higher_level_cache(dut))
cocotb.start_soon(cpu_data_monitor(dut))
num_bytes_read = 2**18
cpu_sequence = [
(random.randint(0, 2**32), random.randint(0, 255), random.randint(0,1), random.randint(0,1))
for _ in range(num_bytes_read)] # 64 bytes times 64 cachelines
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await cpu_sequencer(dut, cpu_sequence)

View File

@@ -0,0 +1,402 @@
import cocotb
from cocotb.handle import LogicArray, Array, Immediate
from cocotb.clock import Clock
from cocotb.triggers import ReadOnly, NextTimeStep, RisingEdge, Timer
import logging
import random
from enum import IntEnum
logger = logging.getLogger()
logger.setLevel(logging.INFO)
CLK_PERIOD = 5
SETS = 64
WAYS = 4
TAG_WIDTH = 20
data_arrays = [{}, {}, {}, {}]
meta_arrays = [{}, {}, {}, {}]
lru_array = {}
class MesiState(IntEnum):
MESI_INVALID = 0
MESI_SHARED = 1,
MESI_EXCLUSIVE = 2,
MESI_MODIFIED = 3,
def write_cacheline(index: int, way: int, data: bytes, mesi_state: MesiState, tag: int):
data_arrays[way][index] = data
meta_arrays[way][index] = (mesi_state << 20) | tag
async def handle_cache_arrays(dut):
while True:
await RisingEdge(dut.i_clk)
if dut.o_write_valid.value:
index = int(dut.o_write_index.value)
write_enables = [bool(int(dut.o_write_valid.value) & (1 << i)) for i in range(4)]
write_data = dut.o_write_data.value.to_bytes(byteorder="little")
write_meta = int(dut.o_write_meta.value)
logger.debug(f"Write Valid: {index=} {write_enables=} {write_data=} {write_meta=:#x}")
for data_array, meta_array, write_enable in zip(data_arrays, meta_arrays, write_enables):
if write_enable:
data_array[index] = write_data
meta_array[index] = write_meta
if dut.o_read_valid.value:
index = int(dut.o_read_index.value)
logger.debug(f"Read Valid: {index=}")
read_data = [LogicArray.from_bytes(data[index], byteorder="little") for data in data_arrays]
read_meta = [meta[index] for meta in meta_arrays]
dut.i_read_data.value = read_data
dut.i_read_meta.value = read_meta
async def handle_lru_arrays(dut):
while True:
await RisingEdge(dut.i_clk)
if dut.o_lru_write_valid.value:
logger.debug("lru write")
lru_write_index = int(dut.o_lru_write_index.value)
lru_write_data = int(dut.o_lru_write_data.value)
lru_array[lru_write_index] = lru_write_data
if dut.o_lru_read_valid.value:
logger.debug("lru read")
lru_read_index = int(dut.o_lru_read_index.value)
dut.i_lru_read_data.value = lru_array[lru_read_index]
async def handle_writeback(dut):
dut.i_writeback_done.value = 0
while True:
await RisingEdge(dut.i_clk)
if not dut.o_writeback_valid.value:
continue
logger.info("Writeback valid")
await RisingEdge(dut.i_clk)
await RisingEdge(dut.i_clk)
dut.i_writeback_done.value = 1
await RisingEdge(dut.i_clk)
dut.i_writeback_done.value = 0
async def handle_bus_interface(dut):
dut.i_memory_done.value = 0
dut.i_memory_resp.value = 0
while True:
await RisingEdge(dut.i_clk)
if not dut.o_memory_valid.value:
continue
logger.debug("Bus Interface Access")
await RisingEdge(dut.i_clk)
await RisingEdge(dut.i_clk)
dut.i_memory_done.value = 1
dut.i_memory_resp.value = 2
await RisingEdge(dut.i_clk)
dut.i_memory_done.value = 0
dut.i_memory_resp.value = 0
@cocotb.test
async def test_sanity(dut):
# Request a read from the cache, then request a write to the cache
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(handle_cache_arrays(dut))
dut.i_cpu_we.value = 0
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await RisingEdge(dut.o_rdy)
for way in range(WAYS):
for index in range(SETS):
write_cacheline(index, way, bytes([0] * 64), MesiState.MESI_EXCLUSIVE, 0)
for i in range(32):
if not dut.o_rdy.value:
continue
dut.i_cpu_tag.value = 0
dut.i_cpu_index.value = i
dut.i_cpu_offset.value = 0
dut.i_rdy.value = 1
dut.i_cpu_we.value = 0
await RisingEdge(dut.i_clk)
@cocotb.test
async def test_clean_eviction(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(handle_cache_arrays(dut))
cocotb.start_soon(handle_lru_arrays(dut))
cocotb.start_soon(handle_writeback(dut))
cocotb.start_soon(handle_bus_interface(dut))
dut.i_cpu_we.value = 0
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await RisingEdge(dut.o_rdy)
INDEX = 2
# Write with tag 0x55
for way in range(WAYS):
write_cacheline(INDEX, way, bytes([0xaa] * 64), MesiState.MESI_SHARED, way+1)
# read with tag 0xaa
dut.i_cpu_tag.value = 0x0
dut.i_cpu_index.value = INDEX
dut.i_cpu_offset.value = 2
dut.i_rdy.value = 1
dut.i_cpu_we.value = 0
await RisingEdge(dut.i_clk)
dut.i_cpu_tag.value = 0xaa
await RisingEdge(dut.i_clk)
dut.i_cpu_tag.value = 0
await Timer(1, "us")
@cocotb.test
async def test_eviction(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(handle_cache_arrays(dut))
cocotb.start_soon(handle_lru_arrays(dut))
cocotb.start_soon(handle_writeback(dut))
cocotb.start_soon(handle_bus_interface(dut))
dut.i_cpu_we.value = 0
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await RisingEdge(dut.o_rdy)
INDEX = 2
# Write with tag 0x55
for way in range(WAYS):
write_cacheline(INDEX, way, bytes([0xaa] * 64), MesiState.MESI_MODIFIED, way+1)
# read with tag 0xaa
dut.i_cpu_tag.value = 0x0
dut.i_cpu_index.value = INDEX
dut.i_cpu_offset.value = 2
dut.i_rdy.value = 1
dut.i_cpu_we.value = 0
await RisingEdge(dut.i_clk)
dut.i_cpu_tag.value = 0xaa
await RisingEdge(dut.i_clk)
dut.i_cpu_tag.value = 0
await Timer(1, "us")
@cocotb.test
async def test_request_ownership(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(handle_cache_arrays(dut))
cocotb.start_soon(handle_lru_arrays(dut))
cocotb.start_soon(handle_writeback(dut))
cocotb.start_soon(handle_bus_interface(dut))
dut.i_cpu_we.value = 0
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await RisingEdge(dut.o_rdy)
INDEX = 2
# Write with tag way + 1
for way in range(WAYS):
write_cacheline(INDEX, way, bytes([0xaa] * 64), MesiState.MESI_SHARED, way+1)
# write with tag 0x2
dut.i_cpu_tag.value = 0
dut.i_cpu_index.value = INDEX
dut.i_cpu_offset.value = 2
dut.i_cpu_data.value = 0xaa
dut.i_rdy.value = 1
dut.i_cpu_we.value = 1
await RisingEdge(dut.i_clk)
dut.i_cpu_data.value = 0
dut.i_cpu_tag.value = 2
await RisingEdge(dut.i_clk)
dut.i_cpu_tag.value = 0
await Timer(1, "us")
@cocotb.test
async def test_way_read_thrash(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(handle_cache_arrays(dut))
cocotb.start_soon(handle_lru_arrays(dut))
cocotb.start_soon(handle_writeback(dut))
cocotb.start_soon(handle_bus_interface(dut))
dut.i_cpu_we.value = 0
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await RisingEdge(dut.o_rdy)
for tag in range(32):
dut.i_cpu_tag.value = tag
dut.i_cpu_index.value = 0
dut.i_cpu_offset.value = 0
dut.i_rdy.value = 1
await RisingEdge(dut.i_clk)
while not dut.o_rdy.value:
await RisingEdge(dut.i_clk)
await Timer(1, "us")
@cocotb.test
async def test_write_waw(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(handle_cache_arrays(dut))
cocotb.start_soon(handle_lru_arrays(dut))
cocotb.start_soon(handle_writeback(dut))
cocotb.start_soon(handle_bus_interface(dut))
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await RisingEdge(dut.o_rdy)
INDEX = 7
TAG = 0xabcd
# unused tag
dut.i_cpu_tag.value = 0xffff
dut.i_rdy.value = 1
dut.i_cpu_we.value = 1
dut.i_cpu_index.value = INDEX
dut.i_cpu_offset.value = 1
dut.i_cpu_data.value = 0xaa
await RisingEdge(dut.i_clk)
dut.i_cpu_tag.value = TAG
while not dut.o_rdy.value:
await RisingEdge(dut.i_clk)
dut.i_cpu_we.value = 1
dut.i_cpu_index.value = INDEX
dut.i_cpu_offset.value = 2
dut.i_cpu_data.value = 0x55
await RisingEdge(dut.i_clk)
dut.i_cpu_tag.value = TAG
while not dut.o_rdy.value:
await RisingEdge(dut.i_clk)
dut.i_cpu_we.value = 0
await Timer(1, "us")
@cocotb.test
async def test_write_raw(dut):
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
cocotb.start_soon(handle_cache_arrays(dut))
cocotb.start_soon(handle_lru_arrays(dut))
cocotb.start_soon(handle_writeback(dut))
cocotb.start_soon(handle_bus_interface(dut))
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await RisingEdge(dut.o_rdy)
INDEX = 7
TAG = 0xabcd
# unused tag
dut.i_cpu_tag.value = 0xffff
dut.i_rdy.value = 1
dut.i_cpu_we.value = 1
dut.i_cpu_index.value = INDEX
dut.i_cpu_offset.value = 1
dut.i_cpu_data.value = 0x41
await RisingEdge(dut.i_clk)
dut.i_cpu_tag.value = TAG
while not dut.o_rdy.value:
await RisingEdge(dut.i_clk)
dut.i_cpu_we.value = 0
dut.i_cpu_index.value = INDEX
dut.i_cpu_offset.value = 1
await RisingEdge(dut.i_clk)
dut.i_cpu_tag.value = TAG
while not dut.o_rdy.value:
await RisingEdge(dut.i_clk)
dut.i_cpu_we.value = 0
await Timer(1, "us")

View File

@@ -0,0 +1,34 @@
import cocotb
from cocotb.handle import Immediate
from cocotb.clock import Clock
from cocotb.triggers import Timer, RisingEdge
import logging
import random
logger = logging.getLogger()
logger.setLevel(logging.INFO)
CLK_PERIOD = 5
@cocotb.test
async def test_sanity(dut):
# Request a read from the cache, then request a write to the cache
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
dut.i_cpu_we.value = 0
dut.i_rst.value = Immediate(1)
for _ in range(10):
await RisingEdge(dut.i_clk)
dut.i_rst.value = 0
await RisingEdge(dut.o_cpu_rdy)
await Timer(10, "us")

View File

@@ -4,4 +4,18 @@ tests:
modules: modules:
- "application_wrapper_cache_arrays_test" - "application_wrapper_cache_arrays_test"
sources: "sources.list" sources: "sources.list"
waves: True
- name: "application_wrapper_cache_miss_handler_test"
toplevel: "application_wrapper_cache_miss_handler"
modules:
- "application_wrapper_cache_miss_handler_test"
sources: "sources.list"
waves: True
- name: "application_wrapper_cache_top_test"
toplevel: "application_wrapper_cache_top"
modules:
- "application_wrapper_cache_top_test"
sources: "sources.list"
waves: True waves: True

View File

@@ -0,0 +1,74 @@
import application_wrapper_cache_pkg::*;
module application_wrapper_cache_bus_interface #(
parameter DATA_W = 64*8,
// these are all wip
localparam REQ_W = 32,
localparam RSP_W = 32,
localparam DAT_W = 512+64,
localparam SNP_W = 32
) (
input logic i_clk,
input logic i_rst,
input logic [31:0] i_cpu_memory_addr,
input logic i_cpu_memory_valid,
input cache_cmd_e i_cpu_memory_cmd,
output logic [DATA_W-1:0] o_cpu_memory_data,
output logic o_cpu_memory_done,
output cache_resp_e o_cpu_memory_resp,
output logic [31:0] o_snoop_addr,
output snoop_cmd_e o_snoop_cmd,
output logic o_snoop_valid,
input logic [31:0] i_writeback_addr,
input logic [DATA_W-1:0] i_writeback_data,
input logic i_writeback_valid,
output logic o_writeback_done,
// CHI Interface
output logic o_txsactive,
input logic o_rxsactive,
output logic o_txlinkactivereq,
input logic i_txlinkactiveack,
output logic o_txreqflitpend,
output logic o_txreqflitv,
output logic [REQ_W-1:0] o_txreqflit,
input logic i_txreqlcrdv,
output logic o_txrspflitpend,
output logic o_txrspflitv,
output logic [RSP_W-1:0] o_txrspflit,
input logic i_txrsplcrdv,
output logic o_txdatflitpend,
output logic o_txdatflitv,
output logic [DAT_W-1:0] o_txdatflit,
input logic i_txdatlcrdv,
input logic i_rxlinkactivereq,
output logic o_rxlinkactiveack,
input logic i_rxrspflitpend,
input logic i_rxrspflitv,
input logic [RSP_W-1:0] i_rxrspflit,
output logic i_rxrsplcrdv,
input logic i_rxdatflitpend,
input logic i_rxdatflitv,
input logic [DAT_W-1:0] i_rxdatflit,
output logic o_rxdatlcrdv,
input logic i_rxsnpflitpend,
input logic i_rxsnpflitv,
input logic [SNP_W-1:0] i_rxsnpflit,
output logic o_rxsnplcrdv
);
endmodule

View File

@@ -0,0 +1,31 @@
module application_wrapper_cache_lru #(
// This should be NUM_WAYS - 1
parameter LRU_W = 3,
parameter NUM_SETS = 64,
localparam INDEX_W = $clog2(NUM_SETS)
) (
input logic i_clk,
input logic [INDEX_W-1:0] i_read_index,
input logic i_read_valid,
output logic [LRU_W-1:0] o_read_data,
input logic [INDEX_W-1:0] i_write_index,
input logic i_write_valid,
input logic [LRU_W-1:0] i_write_data
);
logic [LRU_W-1:0] lru_array [NUM_SETS];
always @(posedge i_clk) begin
if (i_write_valid) begin
lru_array[i_write_index] = i_write_data;
end
if (i_read_valid) begin
o_read_data = lru_array[i_read_index];
end
end
endmodule

View File

@@ -0,0 +1,477 @@
import application_wrapper_cache_pkg::*;
module application_wrapper_cache_miss_handler #(
parameter NUM_WAYS = 4,
parameter NUM_SETS = 64,
localparam CPU_W = 8,
localparam DATA_W = 64*8,
localparam OFFSET_W = 6,
localparam INDEX_W = $clog2(NUM_SETS),
localparam TAG_W = 32 - INDEX_W - OFFSET_W,
localparam LRU_W = NUM_WAYS-1,
localparam META_W = TAG_W + 2
) (
input logic i_clk,
input logic i_rst,
// NOTE: tag is physical tag, expected 1 cycle after the index and the offset
input logic [TAG_W-1:0] i_cpu_tag,
input logic [INDEX_W-1:0] i_cpu_index,
input logic [OFFSET_W-1:0] i_cpu_offset,
input logic i_rdy,
output logic o_rdy,
input logic i_cpu_we,
input logic [CPU_W-1:0] i_cpu_data,
output logic [CPU_W-1:0] o_cpu_data,
output logic [INDEX_W-1:0] o_read_index,
output logic o_read_valid,
input logic [DATA_W-1:0] i_read_data [NUM_WAYS],
input logic [META_W-1:0] i_read_meta [NUM_WAYS],
output logic [INDEX_W-1:0] o_write_index,
output logic [NUM_WAYS-1:0] o_write_valid,
output logic [DATA_W-1:0] o_write_data,
output logic [META_W-1:0] o_write_meta,
output logic [INDEX_W-1:0] o_lru_read_index,
output logic o_lru_read_valid,
input logic [LRU_W-1:0] i_lru_read_data,
output logic [INDEX_W-1:0] o_lru_write_index,
output logic o_lru_write_valid,
output logic [LRU_W-1:0] o_lru_write_data,
output logic [DATA_W-1:0] o_writeback_data,
output logic [31:0] o_writeback_addr,
output logic o_writeback_valid,
input logic i_writeback_done,
output logic [31:0] o_memory_addr,
output logic o_memory_valid,
output cache_cmd_e o_memory_cmd,
input logic [DATA_W-1:0] i_memory_data,
input logic i_memory_done,
input cache_resp_e i_memory_resp
);
enum logic [3:0] {
RESET,
CLEAR_MEMORY,
IDLE,
CHECK_VICTIM,
WRITEBACK,
WAIT_WRITEBACK_ACK,
REQUEST_MEMORY,
WAIT_MEMORY,
REQUEST_OWNERSHIP
} state, state_next;
logic [INDEX_W-1:0] clear_index, clear_index_next;
logic cpu_we_d1;
logic [CPU_W-1:0] cpu_i_data_d1;
logic [TAG_W-1:0] cpu_tag_d1;
logic [INDEX_W-1:0] cpu_index_d1, cpu_index_d2;
logic [OFFSET_W-1:0] cpu_offset_d1, cpu_offset_d2;
logic [TAG_W-1:0] cpu_tag_new, cpu_tag_new_next;
logic [INDEX_W-1:0] cpu_index_new, cpu_index_new_next;
logic [OFFSET_W-1:0] cpu_offset_new, cpu_offset_new_next;
logic [$clog2(NUM_WAYS)-1:0] cpu_way_new, cpu_way_new_next;
logic [7:0] cpu_data_new, cpu_data_new_next;
logic cpu_we_new, cpu_we_new_next;
logic previous_was_valid, previous_was_valid_next;
logic way_match_found;
logic [NUM_WAYS-1:0] way_select_mask;
logic [$clog2(NUM_WAYS)-1:0] way_select_idx;
mesi_e mesi;
logic [TAG_W-1:0] tag;
logic [31:0] read_req_addr, read_req_addr_next;
always_ff @(posedge i_clk) begin
if (i_rst) begin
state <= RESET;
end else begin
state <= state_next;
end
previous_was_valid <= previous_was_valid_next;
read_req_addr <= read_req_addr_next;
cpu_offset_new <= cpu_offset_new_next;
cpu_index_new <= cpu_index_new_next;
cpu_tag_new <= cpu_tag_new_next;
cpu_way_new <= cpu_way_new_next;
cpu_data_new <= cpu_data_new_next;
cpu_we_new <= cpu_we_new_next;
clear_index <= clear_index_next;
cpu_we_d1 <= i_cpu_we;
cpu_i_data_d1 <= i_cpu_data;
cpu_index_d1 <= i_cpu_index;
cpu_index_d2 <= cpu_index_d1;
cpu_tag_d1 <= i_cpu_tag;
cpu_offset_d1 <= i_cpu_offset;
cpu_offset_d2 <= cpu_offset_d1;
end
always_comb begin
o_rdy = '0;
o_cpu_data = '0;
o_read_valid = '0;
o_read_index = '0;
o_write_valid = '0;
o_write_index = '0;
o_write_data = '0;
o_write_meta = '0;
o_lru_read_valid = '0;
o_lru_read_index = '0;
o_lru_write_valid = '0;
o_lru_write_index = '0;
o_lru_write_data = '0;
o_writeback_data = '0;
o_writeback_addr = '0;
o_writeback_valid = '0;
o_memory_addr = '0;
o_memory_valid = '0;
o_memory_cmd = CACHE_CMD_NONE;
way_match_found = '0;
way_select_mask = '0;
way_select_idx = '0;
mesi = MESI_INVALID;
tag = '0;
cpu_offset_new_next = cpu_offset_new;
cpu_index_new_next = cpu_index_new;
cpu_tag_new_next = cpu_tag_new;
cpu_way_new_next = cpu_way_new;
cpu_data_new_next = cpu_data_new;
cpu_we_new_next = cpu_we_new;
read_req_addr_next = read_req_addr;
clear_index_next = clear_index;
previous_was_valid_next = previous_was_valid;
state_next = state;
case (state)
RESET: begin
state_next = CLEAR_MEMORY;
clear_index_next = '0;
previous_was_valid_next = '0;
end
CLEAR_MEMORY: begin
o_write_valid = '1;
o_write_data = '0;
o_write_meta = {MESI_INVALID, (TAG_W)'('0)};
o_write_index = clear_index;
o_lru_write_index = clear_index;
o_lru_write_data = '0;
o_lru_write_valid = '1;
clear_index_next = clear_index + 1;
if (clear_index_next == '0) begin
state_next = IDLE;
end
end
IDLE: begin
// by default, o_rdy is 1 unless something is wrong
o_rdy = '1;
if (previous_was_valid) begin
// data from previous cycle that was read from arrays
way_match_found = '0;
way_select_mask = '0;
for (int i = 0; i < NUM_WAYS; i++) begin
{mesi, tag} = i_read_meta[i];
if (tag == i_cpu_tag && mesi != MESI_INVALID) begin
way_match_found = '1;
way_select_mask[i] = '1;
way_select_idx = 2'(i);
break;
end
end
// We have a match, so either read or write data
if (way_match_found) begin
if (cpu_we_d1) begin
// write data back to the cache array
// check if we are in the M or E states before we write.
// If we are in S then we need to request ownership before
// we can modify it.
if (mesi == MESI_MODIFIED || mesi == MESI_EXCLUSIVE) begin
o_write_data = i_read_data[way_select_idx];
o_write_data[cpu_offset_d1*8 +: CPU_W] = cpu_i_data_d1;
o_write_meta = {MESI_MODIFIED, i_cpu_tag};
o_write_valid = way_select_mask;
o_write_index = cpu_index_d1;
end else begin
o_rdy = '0;
o_memory_addr = {i_cpu_tag, cpu_index_d1, (OFFSET_W)'('0)};
o_memory_cmd = CACHE_CMD_CLEAN_UNIQUE;
o_memory_valid = '1;
cpu_offset_new_next = cpu_offset_d1;
cpu_index_new_next = cpu_index_d1;
cpu_tag_new_next = i_cpu_tag;
cpu_way_new_next = way_select_idx;
cpu_data_new_next = cpu_i_data_d1;
state_next = REQUEST_OWNERSHIP;
end
end else begin
// Send the data to the CPU
o_cpu_data = i_read_data[way_select_idx][cpu_offset_d1*8 +: CPU_W];
end
// update lru
// start by copying the read data, then change the bits
// based on what we matched.
o_lru_write_index = cpu_index_d1;
o_lru_write_data = i_lru_read_data;
o_lru_write_valid = '1;
case (way_select_mask)
4'b0001: begin
o_lru_write_data[0] = '1;
o_lru_write_data[1] = '1;
end
4'b0010: begin
o_lru_write_data[0] = '1;
o_lru_write_data[1] = '0;
end
4'b0100: begin
o_lru_write_data[0] = '0;
o_lru_write_data[2] = '1;
end
4'b1000: begin
o_lru_write_data[0] = '0;
o_lru_write_data[2] = '0;
end
default: begin
end
endcase
end else begin
o_rdy = '0;
state_next = CHECK_VICTIM;
cpu_data_new_next = cpu_i_data_d1;
cpu_we_new_next = cpu_we_d1;
end
end
// Read from arrays
o_read_index = i_cpu_index;
o_read_valid = i_rdy & o_rdy;
o_lru_read_index = i_cpu_index;
o_lru_read_valid = i_rdy & o_rdy;
previous_was_valid_next = '1;
end
REQUEST_OWNERSHIP: begin
if (i_memory_done) begin
// write to the cacheline here.
o_write_data = i_read_data[cpu_way_new];
o_write_data[cpu_offset_new*8 +: CPU_W] = cpu_data_new;
o_write_meta = {MESI_MODIFIED, cpu_tag_new};
o_write_valid = (1 << cpu_way_new);
o_write_index = cpu_index_new;
state_next = IDLE;
// update lru
// start by copying the read data, then change the bits
// based on what we matched.
o_lru_write_index = cpu_index_new;
o_lru_write_data = i_lru_read_data;
o_lru_write_valid = '1;
case (1 << cpu_way_new)
4'b0001: begin
o_lru_write_data[0] = '1;
o_lru_write_data[1] = '1;
end
4'b0010: begin
o_lru_write_data[0] = '1;
o_lru_write_data[1] = '0;
end
4'b0100: begin
o_lru_write_data[0] = '0;
o_lru_write_data[2] = '1;
end
4'b1000: begin
o_lru_write_data[0] = '0;
o_lru_write_data[2] = '0;
end
default: begin
end
endcase
end
end
CHECK_VICTIM: begin
// first use the LRU, then overwrite if there was an invalid way
way_select_idx[0] = i_lru_read_data[0];
way_select_idx[1] = way_select_idx[0] ? i_lru_read_data[2] : i_lru_read_data[1];
for (int i = 0; i < NUM_WAYS; i++) begin
{mesi, tag} = i_read_meta[i];
if (mesi == MESI_INVALID) begin
way_select_idx = 2'(i);
break;
end
end
{mesi, tag} = i_read_meta[way_select_idx];
if (mesi == MESI_MODIFIED) begin
o_writeback_data = i_read_data[way_select_idx];
o_writeback_addr = {tag, cpu_index_d2, (OFFSET_W)'('0)};
o_writeback_valid = '1;
state_next = WAIT_WRITEBACK_ACK;
end else if (mesi == MESI_EXCLUSIVE || mesi == MESI_SHARED) begin
o_memory_addr = {tag, cpu_index_d2, (OFFSET_W)'('0)};
o_memory_valid = '1;
o_memory_cmd = CACHE_CMD_EVICT;
state_next = WAIT_WRITEBACK_ACK;
end else begin
state_next = REQUEST_MEMORY;
end
read_req_addr_next = {cpu_tag_d1, cpu_index_d2, (OFFSET_W)'('0)};
cpu_offset_new_next = cpu_offset_d2;
cpu_index_new_next = cpu_index_d2;
cpu_tag_new_next = cpu_tag_d1;
cpu_way_new_next = way_select_idx;
end
WAIT_WRITEBACK_ACK: begin
// This state is also used when sending the EVICT command,
// before sending the read.
if (i_writeback_done || i_memory_done) begin
state_next = REQUEST_MEMORY;
end
end
REQUEST_MEMORY: begin
// This state can be put into WAIT_WRITEBACK_ACK and CHECK_VICTIM
o_memory_addr = read_req_addr;
o_memory_valid = '1;
if (cpu_we_new) begin
o_memory_cmd = CACHE_CMD_READ_UNIQUE;
end else begin
o_memory_cmd = CACHE_CMD_READ;
end
state_next = WAIT_MEMORY;
end
WAIT_MEMORY: begin
if (i_memory_done) begin
o_write_valid = (1 << cpu_way_new);
o_write_data = i_memory_data;
o_write_index = cpu_index_new;
if (cpu_we_new) begin
o_write_data[cpu_offset_new*8 +: CPU_W] = cpu_data_new;
o_write_meta = {MESI_MODIFIED, cpu_tag_new};
end else begin
if (i_memory_resp == CACHE_RSP_SHARED) begin
o_write_meta = {MESI_SHARED, cpu_tag_new};
end else if (i_memory_resp == CACHE_RSP_EXCLUSIVE) begin
o_write_meta = {MESI_EXCLUSIVE, cpu_tag_new};
end
o_cpu_data = i_memory_data[cpu_offset_new*8 +: CPU_W];
end
o_rdy = '1;
// update lru
// start by copying the read data, then change the bits
// based on what we matched.
o_lru_write_index = cpu_index_new;
o_lru_write_data = i_lru_read_data;
o_lru_write_valid = '1;
case (1 << cpu_way_new)
4'b0001: begin
o_lru_write_data[0] = '1;
o_lru_write_data[1] = '1;
end
4'b0010: begin
o_lru_write_data[0] = '1;
o_lru_write_data[1] = '0;
end
4'b0100: begin
o_lru_write_data[0] = '0;
o_lru_write_data[2] = '1;
end
4'b1000: begin
o_lru_write_data[0] = '0;
o_lru_write_data[2] = '0;
end
default: begin
end
endcase
o_read_index = i_cpu_index;
o_read_valid = i_rdy & o_rdy;
o_lru_read_index = i_cpu_index;
o_lru_read_valid = i_rdy & o_rdy;
state_next = IDLE;
end
end
default: begin
state_next = IDLE;
end
endcase
end
endmodule

View File

@@ -11,18 +11,31 @@ package application_wrapper_cache_pkg;
} page_table_entry_t; } page_table_entry_t;
typedef enum logic [2:0] { typedef enum logic [2:0] {
CACHE_NONE, CACHE_CMD_NONE,
CACHE_READ_SHARED, CACHE_CMD_READ,
CACHE_READ_UNIQUE, CACHE_CMD_READ_UNIQUE,
CACHE_WRITE, CACHE_CMD_WRITE,
CACHE_CLEAN_UNIQUE CACHE_CMD_CLEAN_UNIQUE,
CACHE_CMD_EVICT
} cache_cmd_e; } cache_cmd_e;
typedef enum logic [1:0] { typedef enum logic [1:0] {
MODIFIED, CACHE_RSP_NONE,
EXCLUSIVE, CACHE_RSP_SHARED,
SHARED, CACHE_RSP_EXCLUSIVE
INVALID } cache_resp_e;
typedef enum logic [1:0] {
MESI_INVALID,
MESI_SHARED,
MESI_EXCLUSIVE,
MESI_MODIFIED
} mesi_e; } mesi_e;
typedef enum logic [1:0] {
CACHE_SNP_NONE,
CACHE_SNP_INVALIDATE,
CACHE_SNP_SHARE
} snoop_cmd_e;
endpackage endpackage

View File

@@ -0,0 +1,40 @@
import application_wrapper_cache_pkg::*;
module application_wrapper_cache_snooping #(
parameter NUM_WAYS = 4,
parameter NUM_SETS = 64,
localparam CPU_W = 8,
localparam DATA_W = 64*8,
localparam OFFSET_W = 6,
localparam INDEX_W = $clog2(NUM_SETS),
localparam TAG_W = 32 - INDEX_W - OFFSET_W,
localparam LRU_W = NUM_WAYS-1,
localparam META_W = TAG_W + 2
) (
input logic i_clk,
input logic i_rst,
input logic [31:0] i_snoop_addr,
input snoop_cmd_e i_snoop_cmd,
output logic [INDEX_W-1:0] o_read_index,
output logic o_read_valid,
input logic [DATA_W-1:0] i_read_data [NUM_WAYS],
input logic [META_W-1:0] i_read_meta [NUM_WAYS],
output logic [INDEX_W-1:0] o_write_index,
output logic [NUM_WAYS-1:0] o_write_valid,
output logic [DATA_W-1:0] o_write_data,
output logic [META_W-1:0] o_write_meta,
output logic [DATA_W-1:0] o_writeback_data,
output logic [31:0] o_writeback_addr,
output logic o_writeback_valid,
input logic i_writeback_done
);
endmodule

View File

@@ -0,0 +1,366 @@
import application_wrapper_cache_pkg::*;
module application_wrapper_cache_top #(
parameter NUM_WAYS = 4,
parameter NUM_SETS = 64,
localparam DATA_W = 64*8,
localparam OFFSET_W = 6,
localparam INDEX_W = $clog2(NUM_SETS),
localparam TAG_W = 32 - INDEX_W - OFFSET_W,
localparam LRU_W = NUM_WAYS-1,
localparam META_W = TAG_W + 2,
localparam REQ_W = 32,
localparam RSP_W = 32,
localparam DAT_W = 512+64,
localparam SNP_W = 32
) (
input logic i_clk,
input logic i_rst,
// CPU Interface
input logic [31:0] i_cpu_addr,
input logic i_cpu_we,
input logic i_cpu_sync,
input logic [7:0] i_cpu_data,
output logic [7:0] o_cpu_data,
input logic i_cpu_rdy,
output logic o_cpu_rdy,
// CHI Interface
output logic o_txsactive,
input logic o_rxsactive,
output logic o_txlinkactivereq,
input logic i_txlinkactiveack,
output logic o_txreqflitpend,
output logic o_txreqflitv,
output logic [REQ_W-1:0] o_txreqflit,
input logic i_txreqlcrdv,
output logic o_txrspflitpend,
output logic o_txrspflitv,
output logic [RSP_W-1:0] o_txrspflit,
input logic i_txrsplcrdv,
output logic o_txdatflitpend,
output logic o_txdatflitv,
output logic [DAT_W-1:0] o_txdatflit,
input logic i_txdatlcrdv,
input logic i_rxlinkactivereq,
output logic o_rxlinkactiveack,
input logic i_rxrspflitpend,
input logic i_rxrspflitv,
input logic [RSP_W-1:0] i_rxrspflit,
output logic i_rxrsplcrdv,
input logic i_rxdatflitpend,
input logic i_rxdatflitv,
input logic [DAT_W-1:0] i_rxdatflit,
output logic o_rxdatlcrdv,
input logic i_rxsnpflitpend,
input logic i_rxsnpflitv,
input logic [SNP_W-1:0] i_rxsnpflit,
output logic o_rxsnplcrdv
);
// ngl idk what the difference is between mmu_rdy and mmu_valid
logic mmu_rdy;
logic mmu_valid;
logic [31:0] mmu_phys_address;
page_table_entry_t mmu_page_table_entry;
application_wrapper_mmu #(
.TLB_COUNT (32),
.ADDR_WIDTH (32),
.LOG2_PAGE_SIZE (12)
) u_mmu (
.i_clk (i_clk),
.i_rst (i_rst),
.i_cpu_addr (i_cpu_addr),
.i_rdy (i_cpu_rdy | o_cpu_rdy),
.o_rdy (mmu_rdy),
.o_phys_address (mmu_phys_address),
.o_table_entry (mmu_page_table_entry),
.o_mmu_valid (mmu_valid)
);
logic [TAG_W-1:0] cpu_tag;
logic [INDEX_W-1:0] cpu_index;
logic [OFFSET_W-1:0] cpu_offset;
logic miss_handler_rdy;
logic [INDEX_W-1:0] cpu_read_index;
logic cpu_read_valid;
logic [DATA_W-1:0] cpu_read_data [NUM_WAYS];
logic [META_W-1:0] cpu_read_meta [NUM_WAYS];
logic [INDEX_W-1:0] cpu_write_index;
logic [NUM_WAYS-1:0] cpu_write_valid;
logic [DATA_W-1:0] cpu_write_data;
logic [META_W-1:0] cpu_write_meta;
logic [INDEX_W-1:0] snoop_read_index;
logic snoop_read_valid;
logic [DATA_W-1:0] snoop_read_data [NUM_WAYS];
logic [META_W-1:0] snoop_read_meta [NUM_WAYS];
logic [INDEX_W-1:0] snoop_write_index;
logic [NUM_WAYS-1:0] snoop_write_valid;
logic [DATA_W-1:0] snoop_write_data;
logic [META_W-1:0] snoop_write_meta;
// should the snoop unit also modify the LRU?
// I don't think so...
logic [INDEX_W-1:0] cpu_lru_read_index;
logic cpu_lru_read_valid;
logic [LRU_W-1:0] cpu_lru_read_data;
logic [INDEX_W-1:0] cpu_lru_write_index;
logic cpu_lru_write_valid;
logic [LRU_W-1:0] cpu_lru_write_data;
logic [DATA_W-1:0] cpu_writeback_data;
logic [31:0] cpu_writeback_addr;
logic cpu_writeback_valid;
logic cpu_writeback_done;
logic [DATA_W-1:0] snoop_writeback_data;
logic [31:0] snoop_writeback_addr;
logic snoop_writeback_valid;
logic snoop_writeback_done;
logic [DATA_W-1:0] bus_writeback_data;
logic [31:0] bus_writeback_addr;
logic bus_writeback_valid;
logic bus_writeback_done;
logic [31:0] cpu_memory_addr;
logic cpu_memory_valid;
cache_cmd_e cpu_memory_cmd;
logic [DATA_W-1:0] cpu_memory_data;
logic cpu_memory_done;
cache_resp_e cpu_memory_resp;
logic [31:0] snoop_addr;
snoop_cmd_e snoop_cmd;
logic snoop_valid;
logic [31:0] snoop_memory_addr;
logic snoop_memory_valid;
cache_cmd_e snoop_memory_cmd;
logic [DATA_W-1:0] snoop_memory_data;
logic snoop_memory_done;
cache_resp_e snoop_memory_resp;
// there should be a bypass path here
// separate tag, index, offset from the physical address
assign cpu_tag = mmu_phys_address[31:INDEX_W+OFFSET_W];
assign cpu_index = i_cpu_addr[INDEX_W+OFFSET_W-1:OFFSET_W];
assign cpu_offset = i_cpu_addr[OFFSET_W-1:0];
assign o_cpu_rdy = miss_handler_rdy;
application_wrapper_cache_miss_handler #(
.NUM_WAYS (NUM_WAYS),
.NUM_SETS (NUM_SETS)
) u_miss_handler (
.i_clk (i_clk),
.i_rst (i_rst),
.i_cpu_tag (cpu_tag),
.i_cpu_index (cpu_index),
.i_cpu_offset (cpu_offset),
.i_rdy (mmu_rdy),
.o_rdy (miss_handler_rdy),
.i_cpu_we (i_cpu_we),
.i_cpu_data (i_cpu_data),
.o_cpu_data (o_cpu_data),
.o_read_index (cpu_read_index),
.o_read_valid (cpu_read_valid),
.i_read_data (cpu_read_data),
.i_read_meta (cpu_read_meta),
.o_write_index (cpu_write_index),
.o_write_valid (cpu_write_valid),
.o_write_data (cpu_write_data),
.o_write_meta (cpu_write_meta),
.o_lru_read_index (cpu_lru_read_index),
.o_lru_read_valid (cpu_lru_read_valid),
.i_lru_read_data (cpu_lru_read_data),
.o_lru_write_index (cpu_lru_write_index),
.o_lru_write_valid (cpu_lru_write_valid),
.o_lru_write_data (cpu_lru_write_data),
.o_writeback_data (cpu_writeback_data),
.o_writeback_addr (cpu_writeback_addr),
.o_writeback_valid (cpu_writeback_valid),
.i_writeback_done (cpu_writeback_done),
.o_memory_addr (cpu_memory_addr),
.o_memory_valid (cpu_memory_valid),
.o_memory_cmd (cpu_memory_cmd),
.i_memory_data (cpu_memory_data),
.i_memory_done (cpu_memory_done),
.i_memory_resp (cpu_memory_resp)
);
application_wrapper_cache_arrays #(
.NUM_WAYS (NUM_WAYS),
.NUM_SETS (NUM_SETS)
) u_cache_arrays (
.i_clk (i_clk),
.i_cpu_read_index (cpu_read_index),
.i_cpu_read_valid (cpu_read_valid),
.o_cpu_read_data (cpu_read_data),
.o_cpu_read_meta (cpu_read_meta),
.i_cpu_write_index (cpu_write_index),
.i_cpu_write_valid (cpu_write_valid),
.i_cpu_write_data (cpu_write_data),
.i_cpu_write_meta (cpu_write_meta),
.i_snoop_read_index (snoop_read_index),
.i_snoop_read_valid (snoop_read_valid),
.o_snoop_read_data (snoop_read_data),
.o_snoop_read_meta (snoop_read_meta),
.i_snoop_write_index (snoop_write_index),
.i_snoop_write_valid (snoop_write_valid),
.i_snoop_write_data (snoop_write_data),
.i_snoop_write_meta (snoop_write_meta)
);
application_wrapper_cache_lru #(
.LRU_W (LRU_W),
.NUM_SETS (NUM_SETS)
) u_lru (
.i_clk (i_clk),
.i_read_index (cpu_lru_read_index),
.i_read_valid (cpu_lru_read_valid),
.o_read_data (cpu_lru_read_data),
.i_write_index (cpu_lru_write_index),
.i_write_valid (cpu_lru_write_valid),
.i_write_data (cpu_lru_write_data)
);
application_wrapper_cache_writeback_buffer #(
.DATA_W (DATA_W)
) u_writeback_buffer (
.i_clk (i_clk),
.i_rst (i_rst),
.i_cpu_writeback_data (cpu_writeback_data),
.i_cpu_writeback_addr (cpu_writeback_addr),
.i_cpu_writeback_valid (cpu_writeback_valid),
.o_cpu_writeback_done (cpu_writeback_done),
.i_snoop_writeback_data (snoop_writeback_data),
.i_snoop_writeback_addr (snoop_writeback_addr),
.i_snoop_writeback_valid (snoop_writeback_valid),
.o_snoop_writeback_done (snoop_writeback_done),
.o_bus_writeback_data (bus_writeback_data),
.o_bus_writeback_addr (bus_writeback_addr),
.o_bus_writeback_valid (bus_writeback_valid),
.i_bus_writeback_done (bus_writeback_done)
);
application_wrapper_cache_bus_interface #(
.DATA_W (DATA_W)
) u_bus_interface (
.i_clk (i_clk),
.i_rst (i_rst),
.i_cpu_memory_addr (cpu_memory_addr),
.i_cpu_memory_valid (cpu_memory_valid),
.i_cpu_memory_cmd (cpu_memory_cmd),
.o_cpu_memory_data (cpu_memory_data),
.o_cpu_memory_done (cpu_memory_done),
.o_cpu_memory_resp (cpu_memory_resp),
.o_snoop_addr (snoop_addr),
.o_snoop_cmd (snoop_cmd),
.o_snoop_valid (snoop_valid),
.i_writeback_addr (bus_writeback_addr),
.i_writeback_data (bus_writeback_data),
.i_writeback_valid (bus_writeback_valid),
.o_writeback_done (bus_writeback_done),
.o_txsactive (o_txsactive),
.o_rxsactive (o_rxsactive),
.o_txlinkactivereq (o_txlinkactivereq),
.i_txlinkactiveack (i_txlinkactiveack),
.o_txreqflitpend (o_txreqflitpend),
.o_txreqflitv (o_txreqflitv),
.o_txreqflit (o_txreqflit),
.i_txreqlcrdv (i_txreqlcrdv),
.o_txrspflitpend (o_txrspflitpend),
.o_txrspflitv (o_txrspflitv),
.o_txrspflit (o_txrspflit),
.i_txrsplcrdv (i_txrsplcrdv),
.o_txdatflitpend (o_txdatflitpend),
.o_txdatflitv (o_txdatflitv),
.o_txdatflit (o_txdatflit),
.i_txdatlcrdv (i_txdatlcrdv),
.i_rxlinkactivereq (i_rxlinkactivereq),
.o_rxlinkactiveack (o_rxlinkactiveack),
.i_rxrspflitpend (i_rxrspflitpend),
.i_rxrspflitv (i_rxrspflitv),
.i_rxrspflit (i_rxrspflit),
.i_rxrsplcrdv (i_rxrsplcrdv),
.i_rxdatflitpend (i_rxdatflitpend),
.i_rxdatflitv (i_rxdatflitv),
.i_rxdatflit (i_rxdatflit),
.o_rxdatlcrdv (o_rxdatlcrdv),
.i_rxsnpflitpend (i_rxsnpflitpend),
.i_rxsnpflitv (i_rxsnpflitv),
.i_rxsnpflit (i_rxsnpflit),
.o_rxsnplcrdv (o_rxsnplcrdv)
);
endmodule

View File

@@ -0,0 +1,23 @@
module application_wrapper_cache_writeback_buffer #(
parameter DATA_W = 64*8
) (
input logic i_clk,
input logic i_rst,
input logic [DATA_W-1:0] i_cpu_writeback_data,
input logic [31:0] i_cpu_writeback_addr,
input logic i_cpu_writeback_valid,
output logic o_cpu_writeback_done,
input logic [DATA_W-1:0] i_snoop_writeback_data,
input logic [31:0] i_snoop_writeback_addr,
input logic i_snoop_writeback_valid,
output logic o_snoop_writeback_done,
output logic [DATA_W-1:0] o_bus_writeback_data,
output logic [31:0] o_bus_writeback_addr,
output logic o_bus_writeback_valid,
input logic i_bus_writeback_done
);
endmodule

View File

@@ -9,13 +9,27 @@ module application_wrapper_mmu #(
input logic i_rst, input logic i_rst,
input logic [ADDR_WIDTH-1:0] i_cpu_addr, input logic [ADDR_WIDTH-1:0] i_cpu_addr,
input i_we, input logic i_rdy,
input i_rdy, output logic o_rdy,
input o_rdy,
output logic [ADDR_WIDTH-1:0] o_phys_address, output logic [ADDR_WIDTH-1:0] o_phys_address,
output page_table_entry_t o_table_entry, output page_table_entry_t o_table_entry,
output logic o_mmu_valid output logic o_mmu_valid
); );
assign o_rdy = '1;
always @(posedge i_clk) begin
o_mmu_valid <= i_rdy;
o_phys_address <= i_cpu_addr;
o_table_entry.cache_disable <= '0;
o_table_entry.read_eanble <= '1;
o_table_entry.write_enable <= '1;
o_table_entry.execute_enable <= '1;
o_table_entry.supervisor <= '1;
o_table_entry.present <= '1;
o_table_entry.write_through <= '0;
end
endmodule endmodule

View File

@@ -0,0 +1,52 @@
# Top level requirements
1. Parameterizable cache size (default 64)
2. Non-parameterizable cacheline width (static 64)
3. Single cycle access required for use with 6502.
4. Support MESI cache coherency Protocl (I L L)
5. Direct mapped cache for low latency
6. Interface with other coherent components
## Single Cycle Access
For reads, the 6502 presents an address on the address bus and expects data on
the din bus on the very next cycle. Because the 6502 has so few registers, it
is critical for memory to have as little latency as possible. For this reason,
the cache is also direct mapped. Future versions could make this associative,
but not right now.
## MESI Cache Coherency
Cachelines must support the illinois protocol. Compatibility with CHI is shown
below:
MODIFIED: when this line is evicted, we write the cacheline to main memory
EXCLUSIVE: We can write to this cacheline without informing the other caches,
since we are the only ones with this cacheline
SHARED: If we write to this cachline, we must notify the coherenc controller
so that it can invalidate all other copies of the cacheline
INVALID: If we read from a cacheline, send out a request to the coherency
controller to see if anybody else has a copy. If they do, then read the
data from the other cache. Otherwise, read from main memory.
If we write to this cacheline, send a request to the coherency controller
to see if anybody else has this cachline. If they have it, then read the
data from the cache which has it (the first one if multiple do) and invalidate
the entries in all caches which have it.
## Interface
The interface is based off of the CHI interface, with 4 separate physical
interfaces: REQ, RSP, DAT, SNP.
The CPU cache will send out commands on the REQ interface. The coherency
controller will also send out commands on the REQ interface. The response
channel contains the response to the request, not the data. Data comes on
the DAT interface. SNP requests are sent on the SNP interface, and responses
and data come back on the same interfaces. Like CHI, if the response has
data the then response and data both come back on the DAT interface, if the
response has no data then it just comes back on the RSP interface.
SNP requests only come from the coherency controller.

View File

@@ -1,6 +1,11 @@
cache/application_wrapper_cache_pkg.sv cache/application_wrapper_cache_pkg.sv
cache/application_wrapper_cache_arrays.sv cache/application_wrapper_cache_arrays.sv
cache/application_wrapper_mmu.sv cache/application_wrapper_cache_bus_interface.sv
cache/application_wrapper_cache_lru.sv
cache/application_wrapper_cache_miss_handler.sv
cache/application_wrapper_cache_snooping.sv
cache/application_wrapper_cache_top.sv cache/application_wrapper_cache_top.sv
cache/application_wrapper_cache_writeback_buffer.sv
cache/application_wrapper_mmu.sv
application_wrapper_top.sv application_wrapper_top.sv