Compare commits
6 Commits
df25550c8a
...
blathi/soc
| Author | SHA1 | Date | |
|---|---|---|---|
| cdbb6a9720 | |||
| 6c6c3d295b | |||
| a21cc4241a | |||
| 151643b2ad | |||
| 61ee654b18 | |||
| aa8c4a64df |
@@ -1,493 +0,0 @@
|
||||
import cocotb
|
||||
from cocotb.handle import Immediate, LogicArray
|
||||
|
||||
from cocotb.simulator import get_sim_time
|
||||
|
||||
from cocotb.clock import Clock
|
||||
from cocotb.triggers import Timer, RisingEdge, FallingEdge, with_timeout
|
||||
|
||||
from enum import IntEnum
|
||||
|
||||
from collections import defaultdict
|
||||
from collections.abc import Mapping
|
||||
|
||||
import logging
|
||||
|
||||
import random
|
||||
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
CLK_PERIOD = 5
|
||||
|
||||
reference_cache_data = defaultdict(bytearray)
|
||||
|
||||
higher_cache_data = defaultdict(bytearray)
|
||||
|
||||
async def cpu_sequencer(dut, sequence: Mapping[int, int, bool, bool]):
|
||||
|
||||
|
||||
addr, do, we, sync = sequence[0]
|
||||
|
||||
dut.i_addr.value = addr
|
||||
dut.i_data.value = do
|
||||
dut.i_we.value = we
|
||||
dut.i_sync.value = sync
|
||||
|
||||
await FallingEdge(dut.i_rst)
|
||||
|
||||
index = 1
|
||||
|
||||
while index < len(sequence):
|
||||
await RisingEdge(dut.i_clk)
|
||||
if not dut.o_rdy.value:
|
||||
continue
|
||||
|
||||
addr, do, we, sync = sequence[index]
|
||||
|
||||
dut.i_addr.value = addr
|
||||
dut.i_data.value = do
|
||||
dut.i_we.value = we
|
||||
dut.i_sync.value = sync
|
||||
|
||||
index += 1
|
||||
|
||||
await Timer(150, "ns")
|
||||
|
||||
async def cpu_data_monitor(dut):
|
||||
previous_address = 0
|
||||
address = 0
|
||||
|
||||
we = 0
|
||||
previous_we = 0
|
||||
|
||||
i_data = 0
|
||||
previous_i_data = 0
|
||||
|
||||
await FallingEdge(dut.i_rst)
|
||||
|
||||
while True:
|
||||
await RisingEdge(dut.i_clk)
|
||||
if not dut.o_rdy.value:
|
||||
continue
|
||||
|
||||
previous_address = address
|
||||
previous_we = we
|
||||
address = int(dut.i_addr.value)
|
||||
we = int(dut.i_we.value)
|
||||
|
||||
previous_i_data = i_data
|
||||
i_data = int(dut.i_data.value)
|
||||
|
||||
data = int(dut.o_data.value)
|
||||
|
||||
if previous_address == 0:
|
||||
continue
|
||||
|
||||
# don't care if it was a write
|
||||
if previous_we:
|
||||
|
||||
index = (previous_address // 64) % 64
|
||||
offset = previous_address % 64
|
||||
|
||||
cacheline = reference_cache_data[index]
|
||||
|
||||
cacheline[offset] = previous_i_data
|
||||
logger.debug(f"We saw a write here {index=} {offset=} previous_data={previous_i_data:x}")
|
||||
else:
|
||||
index = (previous_address // 64) % 64
|
||||
offset = previous_address % 64
|
||||
|
||||
cacheline = reference_cache_data[index]
|
||||
|
||||
expected_data = cacheline[offset]
|
||||
|
||||
if (data != expected_data):
|
||||
logger.error(f"{get_sim_time()} {address=:x} {previous_address=:x} {data=:x} {expected_data=:x}")
|
||||
|
||||
|
||||
|
||||
async def mmu_sequencer(dut):
|
||||
while True:
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_phys_address.value = dut.i_addr.value
|
||||
|
||||
async def handle_higher_level_cache(dut):
|
||||
dut.i_cache_rdy.value = 0
|
||||
|
||||
class CacheCmd(IntEnum):
|
||||
CACHE_NONE = 0
|
||||
CACHE_READ = 1
|
||||
CACHE_WRITE = 2
|
||||
|
||||
while True:
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_cache_rdy.value = 0
|
||||
|
||||
if not dut.o_cache_valid.value:
|
||||
continue
|
||||
|
||||
cmd = CacheCmd(dut.o_cache_cmd.value)
|
||||
addr = int(dut.o_cache_addr.value)
|
||||
|
||||
logger.debug(f"{cmd=} {addr=}")
|
||||
|
||||
|
||||
if cmd == CacheCmd.CACHE_READ:
|
||||
|
||||
if addr not in higher_cache_data:
|
||||
data = bytearray(random.randbytes(64))
|
||||
higher_cache_data[addr] = data
|
||||
|
||||
dut.i_cache_data.value = LogicArray.from_bytes(higher_cache_data[addr] , byteorder="little")
|
||||
|
||||
dut.i_cache_rdy.value = 1
|
||||
|
||||
reference_cache_data[int(dut.read_index.value)] = higher_cache_data[addr]
|
||||
|
||||
await RisingEdge(dut.i_clk)
|
||||
|
||||
dut.i_cache_rdy.value = 0
|
||||
|
||||
elif cmd == CacheCmd.CACHE_WRITE:
|
||||
|
||||
dut.i_cache_rdy.value = 1
|
||||
|
||||
data = dut.o_cache_data.value.to_bytes(byteorder="little")
|
||||
|
||||
higher_cache_data[addr] = bytearray(data)
|
||||
|
||||
await RisingEdge(dut.i_clk)
|
||||
|
||||
dut.i_cache_rdy.value = 0
|
||||
|
||||
@cocotb.test
|
||||
async def sanity_test(dut):
|
||||
expected_cache_misses = 0
|
||||
expected_evictions = 0
|
||||
|
||||
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
|
||||
cocotb.start_soon(mmu_sequencer(dut))
|
||||
cocotb.start_soon(handle_higher_level_cache(dut))
|
||||
cocotb.start_soon(cpu_data_monitor(dut))
|
||||
|
||||
cpu_sequence = [
|
||||
(0x100, 0xaa, True, False),
|
||||
(0x101, 0xbb, True, False),
|
||||
(0x100, 0x00, False, False),
|
||||
(0x101, 0x00, False, False),
|
||||
(0x200, 0xcc, True, False),
|
||||
(0x201, 0xdd, True, False),
|
||||
(0x100, 0x00, False, False),
|
||||
(0x101, 0x00, False, False),
|
||||
(0x200, 0x00, False, False),
|
||||
(0x201, 0x00, False, False),
|
||||
(0x100, 0x11, True, False),
|
||||
(0x101, 0x22, True, False),
|
||||
(0x100, 0x00, False, False),
|
||||
(0x200, 0x33, True, False),
|
||||
(0x101, 0x00, False, False),
|
||||
(0x201, 0x44, True, False),
|
||||
(0x100, 0x00, False, False),
|
||||
(0x200, 0x00, False, False),
|
||||
(0x101, 0x00, False, False),
|
||||
(0x201, 0x00, False, False),
|
||||
]
|
||||
|
||||
dut.i_rst.value = Immediate(1)
|
||||
for _ in range(10):
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_rst.value = 0
|
||||
|
||||
await cpu_sequencer(dut, cpu_sequence)
|
||||
|
||||
|
||||
expected_cache_misses = 2
|
||||
expected_evictions = 0
|
||||
|
||||
dut_evictions = int(dut.eviction_count.value)
|
||||
dut_misses = int(dut.cache_miss_count.value)
|
||||
|
||||
if dut_evictions != expected_evictions:
|
||||
logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}")
|
||||
|
||||
if dut_misses != expected_cache_misses:
|
||||
logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}")
|
||||
|
||||
|
||||
@cocotb.test
|
||||
async def clean_evict_test(dut):
|
||||
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
|
||||
cocotb.start_soon(mmu_sequencer(dut))
|
||||
cocotb.start_soon(handle_higher_level_cache(dut))
|
||||
cocotb.start_soon(cpu_data_monitor(dut))
|
||||
|
||||
# Read from one cacheline, then read from an aliased cacheline without writing.
|
||||
# cacheline should be overwritten without evicting
|
||||
cpu_sequence = [
|
||||
(0x100, 0x00, False, False),
|
||||
(0x1100, 0x00, False, False),
|
||||
]
|
||||
|
||||
dut.i_rst.value = Immediate(1)
|
||||
for _ in range(10):
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_rst.value = 0
|
||||
|
||||
await cpu_sequencer(dut, cpu_sequence)
|
||||
|
||||
expected_cache_misses = 2
|
||||
expected_evictions = 0
|
||||
|
||||
dut_evictions = int(dut.eviction_count.value)
|
||||
dut_misses = int(dut.cache_miss_count.value)
|
||||
|
||||
if dut_evictions != expected_evictions:
|
||||
logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}")
|
||||
|
||||
if dut_misses != expected_cache_misses:
|
||||
logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}")
|
||||
|
||||
@cocotb.test
|
||||
async def dirty_evict_test(dut):
|
||||
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
|
||||
cocotb.start_soon(mmu_sequencer(dut))
|
||||
cocotb.start_soon(handle_higher_level_cache(dut))
|
||||
cocotb.start_soon(cpu_data_monitor(dut))
|
||||
|
||||
# Read from one cacheline, then read from an aliased cacheline without writing.
|
||||
# cacheline should be overwritten without evicting
|
||||
cpu_sequence = [
|
||||
(0x100, 0x41, True, False),
|
||||
(0x101, 0x42, True, False),
|
||||
(0x1100, 0x00, False, False),
|
||||
(0x1100, 0xaa, True, False),
|
||||
(0x100, 0x00, False, False)
|
||||
]
|
||||
|
||||
dut.i_rst.value = Immediate(1)
|
||||
for _ in range(10):
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_rst.value = 0
|
||||
|
||||
await cpu_sequencer(dut, cpu_sequence)
|
||||
|
||||
expected_cache_misses = 3
|
||||
expected_evictions = 2
|
||||
|
||||
dut_evictions = int(dut.eviction_count.value)
|
||||
dut_misses = int(dut.cache_miss_count.value)
|
||||
|
||||
if dut_evictions != expected_evictions:
|
||||
logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}")
|
||||
|
||||
if dut_misses != expected_cache_misses:
|
||||
logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}")
|
||||
|
||||
|
||||
@cocotb.test
|
||||
async def long_write_thrash_test(dut):
|
||||
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
|
||||
cocotb.start_soon(mmu_sequencer(dut))
|
||||
cocotb.start_soon(handle_higher_level_cache(dut))
|
||||
cocotb.start_soon(cpu_data_monitor(dut))
|
||||
|
||||
num_lines_read = 2**20//64
|
||||
|
||||
cpu_sequence = [
|
||||
(i*64, i % 256, True, False)
|
||||
for i in range(num_lines_read)]
|
||||
|
||||
dut.i_rst.value = Immediate(1)
|
||||
for _ in range(10):
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_rst.value = 0
|
||||
|
||||
await cpu_sequencer(dut, cpu_sequence)
|
||||
|
||||
# The last 64 lines aren't evicted
|
||||
expected_cache_misses = num_lines_read
|
||||
expected_evictions = num_lines_read - 64
|
||||
|
||||
dut_evictions = int(dut.eviction_count.value)
|
||||
dut_misses = int(dut.cache_miss_count.value)
|
||||
|
||||
if dut_evictions != expected_evictions:
|
||||
logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}")
|
||||
|
||||
if dut_misses != expected_cache_misses:
|
||||
logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}")
|
||||
|
||||
|
||||
@cocotb.test
|
||||
async def long_write_read_thrash_test(dut):
|
||||
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
|
||||
cocotb.start_soon(mmu_sequencer(dut))
|
||||
cocotb.start_soon(handle_higher_level_cache(dut))
|
||||
cocotb.start_soon(cpu_data_monitor(dut))
|
||||
|
||||
num_lines_read = 2**20//64
|
||||
|
||||
|
||||
cpu_sequence = [
|
||||
(i*64, i % 256, True, False)
|
||||
for i in range(num_lines_read)]
|
||||
|
||||
cpu_sequence.extend([
|
||||
(i*64, 0, False, False)
|
||||
for i in range(num_lines_read)])
|
||||
|
||||
dut.i_rst.value = Immediate(1)
|
||||
for _ in range(10):
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_rst.value = 0
|
||||
|
||||
await cpu_sequencer(dut, cpu_sequence)
|
||||
|
||||
expected_cache_misses = num_lines_read * 2
|
||||
expected_evictions = num_lines_read
|
||||
|
||||
dut_evictions = int(dut.eviction_count.value)
|
||||
dut_misses = int(dut.cache_miss_count.value)
|
||||
|
||||
if dut_evictions != expected_evictions:
|
||||
logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}")
|
||||
|
||||
if dut_misses != expected_cache_misses:
|
||||
logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}")
|
||||
|
||||
|
||||
@cocotb.test
|
||||
async def long_write_linear_test(dut):
|
||||
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
|
||||
cocotb.start_soon(mmu_sequencer(dut))
|
||||
cocotb.start_soon(handle_higher_level_cache(dut))
|
||||
cocotb.start_soon(cpu_data_monitor(dut))
|
||||
|
||||
num_bytes_read = 2**16
|
||||
|
||||
cpu_sequence = [
|
||||
(i, i % 256, True, False)
|
||||
for i in range(num_bytes_read)]
|
||||
|
||||
dut.i_rst.value = Immediate(1)
|
||||
for _ in range(10):
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_rst.value = 0
|
||||
|
||||
await cpu_sequencer(dut, cpu_sequence)
|
||||
|
||||
expected_cache_misses = num_bytes_read // 64
|
||||
expected_evictions = num_bytes_read//64 - 64
|
||||
|
||||
dut_evictions = int(dut.eviction_count.value)
|
||||
dut_misses = int(dut.cache_miss_count.value)
|
||||
|
||||
if dut_evictions != expected_evictions:
|
||||
logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}")
|
||||
|
||||
if dut_misses != expected_cache_misses:
|
||||
logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}")
|
||||
|
||||
|
||||
@cocotb.test
|
||||
async def long_write_read_linear_test(dut):
|
||||
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
|
||||
cocotb.start_soon(mmu_sequencer(dut))
|
||||
cocotb.start_soon(handle_higher_level_cache(dut))
|
||||
cocotb.start_soon(cpu_data_monitor(dut))
|
||||
|
||||
num_bytes_read = 2**16
|
||||
|
||||
|
||||
cpu_sequence = [
|
||||
(i, i % 256, True, False)
|
||||
for i in range(num_bytes_read)]
|
||||
|
||||
cpu_sequence.extend([
|
||||
(i, 0, False, False)
|
||||
for i in range(num_bytes_read)])
|
||||
|
||||
dut.i_rst.value = Immediate(1)
|
||||
for _ in range(10):
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_rst.value = 0
|
||||
|
||||
await cpu_sequencer(dut, cpu_sequence)
|
||||
|
||||
expected_cache_misses = (num_bytes_read // 64) * 2
|
||||
expected_evictions = num_bytes_read // 64
|
||||
|
||||
dut_evictions = int(dut.eviction_count.value)
|
||||
dut_misses = int(dut.cache_miss_count.value)
|
||||
|
||||
if dut_evictions != expected_evictions:
|
||||
logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}")
|
||||
|
||||
if dut_misses != expected_cache_misses:
|
||||
logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}")
|
||||
|
||||
@cocotb.test
|
||||
async def short_write_read_linear_test(dut):
|
||||
# What makes this test "short" is that we read 64 cachelines,
|
||||
# so we shouldn't have to make any evictions
|
||||
# TODO add number of evictions and cachlines loaded as performance counteres
|
||||
|
||||
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
|
||||
cocotb.start_soon(mmu_sequencer(dut))
|
||||
cocotb.start_soon(handle_higher_level_cache(dut))
|
||||
cocotb.start_soon(cpu_data_monitor(dut))
|
||||
|
||||
num_bytes_read = 64*64
|
||||
|
||||
cpu_sequence = [
|
||||
(i, i % 256, True, False)
|
||||
for i in range(num_bytes_read)] # 64 bytes times 64 cachelines
|
||||
|
||||
cpu_sequence.extend([
|
||||
(i, i % 256, False, False)
|
||||
for i in range(num_bytes_read)]) # 64 bytes times 64 cachelines
|
||||
|
||||
dut.i_rst.value = Immediate(1)
|
||||
for _ in range(10):
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_rst.value = 0
|
||||
|
||||
await cpu_sequencer(dut, cpu_sequence)
|
||||
|
||||
expected_cache_misses = num_bytes_read//64
|
||||
expected_evictions = num_bytes_read//64 - 64
|
||||
|
||||
dut_evictions = int(dut.eviction_count.value)
|
||||
dut_misses = int(dut.cache_miss_count.value)
|
||||
|
||||
if dut_evictions != expected_evictions:
|
||||
logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}")
|
||||
|
||||
if dut_misses != expected_cache_misses:
|
||||
logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}")
|
||||
|
||||
@cocotb.test
|
||||
async def random_access_test(dut):
|
||||
# Just fully random accesses
|
||||
# This is also kind of a thrash test since this is not realistic
|
||||
|
||||
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
|
||||
cocotb.start_soon(mmu_sequencer(dut))
|
||||
cocotb.start_soon(handle_higher_level_cache(dut))
|
||||
cocotb.start_soon(cpu_data_monitor(dut))
|
||||
|
||||
num_bytes_read = 2**18
|
||||
|
||||
cpu_sequence = [
|
||||
(random.randint(0, 2**32), random.randint(0, 255), random.randint(0,1), random.randint(0,1))
|
||||
for _ in range(num_bytes_read)] # 64 bytes times 64 cachelines
|
||||
|
||||
dut.i_rst.value = Immediate(1)
|
||||
for _ in range(10):
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_rst.value = 0
|
||||
|
||||
await cpu_sequencer(dut, cpu_sequence)
|
||||
402
sim/application_wrapper/cache/application_wrapper_cache_miss_handler_test.py
vendored
Normal file
402
sim/application_wrapper/cache/application_wrapper_cache_miss_handler_test.py
vendored
Normal file
@@ -0,0 +1,402 @@
|
||||
import cocotb
|
||||
from cocotb.handle import LogicArray, Array, Immediate
|
||||
|
||||
from cocotb.clock import Clock
|
||||
from cocotb.triggers import ReadOnly, NextTimeStep, RisingEdge, Timer
|
||||
|
||||
import logging
|
||||
|
||||
import random
|
||||
|
||||
from enum import IntEnum
|
||||
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
CLK_PERIOD = 5
|
||||
|
||||
|
||||
SETS = 64
|
||||
WAYS = 4
|
||||
|
||||
TAG_WIDTH = 20
|
||||
|
||||
data_arrays = [{}, {}, {}, {}]
|
||||
meta_arrays = [{}, {}, {}, {}]
|
||||
|
||||
lru_array = {}
|
||||
|
||||
class MesiState(IntEnum):
|
||||
MESI_INVALID = 0
|
||||
MESI_SHARED = 1,
|
||||
MESI_EXCLUSIVE = 2,
|
||||
MESI_MODIFIED = 3,
|
||||
|
||||
def write_cacheline(index: int, way: int, data: bytes, mesi_state: MesiState, tag: int):
|
||||
data_arrays[way][index] = data
|
||||
meta_arrays[way][index] = (mesi_state << 20) | tag
|
||||
|
||||
async def handle_cache_arrays(dut):
|
||||
|
||||
while True:
|
||||
await RisingEdge(dut.i_clk)
|
||||
if dut.o_write_valid.value:
|
||||
index = int(dut.o_write_index.value)
|
||||
write_enables = [bool(int(dut.o_write_valid.value) & (1 << i)) for i in range(4)]
|
||||
write_data = dut.o_write_data.value.to_bytes(byteorder="little")
|
||||
write_meta = int(dut.o_write_meta.value)
|
||||
logger.debug(f"Write Valid: {index=} {write_enables=} {write_data=} {write_meta=:#x}")
|
||||
|
||||
for data_array, meta_array, write_enable in zip(data_arrays, meta_arrays, write_enables):
|
||||
if write_enable:
|
||||
data_array[index] = write_data
|
||||
meta_array[index] = write_meta
|
||||
if dut.o_read_valid.value:
|
||||
index = int(dut.o_read_index.value)
|
||||
logger.debug(f"Read Valid: {index=}")
|
||||
|
||||
read_data = [LogicArray.from_bytes(data[index], byteorder="little") for data in data_arrays]
|
||||
read_meta = [meta[index] for meta in meta_arrays]
|
||||
|
||||
dut.i_read_data.value = read_data
|
||||
dut.i_read_meta.value = read_meta
|
||||
|
||||
async def handle_lru_arrays(dut):
|
||||
while True:
|
||||
await RisingEdge(dut.i_clk)
|
||||
if dut.o_lru_write_valid.value:
|
||||
logger.debug("lru write")
|
||||
|
||||
lru_write_index = int(dut.o_lru_write_index.value)
|
||||
lru_write_data = int(dut.o_lru_write_data.value)
|
||||
|
||||
lru_array[lru_write_index] = lru_write_data
|
||||
|
||||
if dut.o_lru_read_valid.value:
|
||||
logger.debug("lru read")
|
||||
|
||||
lru_read_index = int(dut.o_lru_read_index.value)
|
||||
|
||||
dut.i_lru_read_data.value = lru_array[lru_read_index]
|
||||
|
||||
|
||||
|
||||
async def handle_writeback(dut):
|
||||
dut.i_writeback_done.value = 0
|
||||
|
||||
while True:
|
||||
await RisingEdge(dut.i_clk)
|
||||
if not dut.o_writeback_valid.value:
|
||||
continue
|
||||
|
||||
logger.info("Writeback valid")
|
||||
await RisingEdge(dut.i_clk)
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_writeback_done.value = 1
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_writeback_done.value = 0
|
||||
|
||||
async def handle_bus_interface(dut):
|
||||
dut.i_memory_done.value = 0
|
||||
dut.i_memory_resp.value = 0
|
||||
|
||||
while True:
|
||||
await RisingEdge(dut.i_clk)
|
||||
if not dut.o_memory_valid.value:
|
||||
continue
|
||||
|
||||
logger.debug("Bus Interface Access")
|
||||
await RisingEdge(dut.i_clk)
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_memory_done.value = 1
|
||||
dut.i_memory_resp.value = 2
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_memory_done.value = 0
|
||||
dut.i_memory_resp.value = 0
|
||||
|
||||
@cocotb.test
|
||||
async def test_sanity(dut):
|
||||
# Request a read from the cache, then request a write to the cache
|
||||
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
|
||||
cocotb.start_soon(handle_cache_arrays(dut))
|
||||
|
||||
dut.i_cpu_we.value = 0
|
||||
|
||||
dut.i_rst.value = Immediate(1)
|
||||
for _ in range(10):
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_rst.value = 0
|
||||
|
||||
await RisingEdge(dut.o_rdy)
|
||||
|
||||
for way in range(WAYS):
|
||||
for index in range(SETS):
|
||||
write_cacheline(index, way, bytes([0] * 64), MesiState.MESI_EXCLUSIVE, 0)
|
||||
|
||||
|
||||
for i in range(32):
|
||||
if not dut.o_rdy.value:
|
||||
continue
|
||||
|
||||
dut.i_cpu_tag.value = 0
|
||||
dut.i_cpu_index.value = i
|
||||
dut.i_cpu_offset.value = 0
|
||||
|
||||
dut.i_rdy.value = 1
|
||||
|
||||
dut.i_cpu_we.value = 0
|
||||
|
||||
await RisingEdge(dut.i_clk)
|
||||
|
||||
|
||||
@cocotb.test
|
||||
async def test_clean_eviction(dut):
|
||||
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
|
||||
cocotb.start_soon(handle_cache_arrays(dut))
|
||||
cocotb.start_soon(handle_lru_arrays(dut))
|
||||
|
||||
cocotb.start_soon(handle_writeback(dut))
|
||||
cocotb.start_soon(handle_bus_interface(dut))
|
||||
|
||||
dut.i_cpu_we.value = 0
|
||||
|
||||
dut.i_rst.value = Immediate(1)
|
||||
for _ in range(10):
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_rst.value = 0
|
||||
|
||||
await RisingEdge(dut.o_rdy)
|
||||
|
||||
INDEX = 2
|
||||
|
||||
# Write with tag 0x55
|
||||
for way in range(WAYS):
|
||||
write_cacheline(INDEX, way, bytes([0xaa] * 64), MesiState.MESI_SHARED, way+1)
|
||||
|
||||
# read with tag 0xaa
|
||||
dut.i_cpu_tag.value = 0x0
|
||||
dut.i_cpu_index.value = INDEX
|
||||
dut.i_cpu_offset.value = 2
|
||||
|
||||
dut.i_rdy.value = 1
|
||||
|
||||
dut.i_cpu_we.value = 0
|
||||
|
||||
await RisingEdge(dut.i_clk)
|
||||
|
||||
dut.i_cpu_tag.value = 0xaa
|
||||
|
||||
await RisingEdge(dut.i_clk)
|
||||
|
||||
dut.i_cpu_tag.value = 0
|
||||
|
||||
await Timer(1, "us")
|
||||
|
||||
@cocotb.test
|
||||
async def test_eviction(dut):
|
||||
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
|
||||
cocotb.start_soon(handle_cache_arrays(dut))
|
||||
cocotb.start_soon(handle_lru_arrays(dut))
|
||||
cocotb.start_soon(handle_writeback(dut))
|
||||
cocotb.start_soon(handle_bus_interface(dut))
|
||||
|
||||
dut.i_cpu_we.value = 0
|
||||
|
||||
dut.i_rst.value = Immediate(1)
|
||||
for _ in range(10):
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_rst.value = 0
|
||||
|
||||
await RisingEdge(dut.o_rdy)
|
||||
|
||||
INDEX = 2
|
||||
|
||||
# Write with tag 0x55
|
||||
for way in range(WAYS):
|
||||
write_cacheline(INDEX, way, bytes([0xaa] * 64), MesiState.MESI_MODIFIED, way+1)
|
||||
|
||||
# read with tag 0xaa
|
||||
dut.i_cpu_tag.value = 0x0
|
||||
dut.i_cpu_index.value = INDEX
|
||||
dut.i_cpu_offset.value = 2
|
||||
|
||||
dut.i_rdy.value = 1
|
||||
|
||||
dut.i_cpu_we.value = 0
|
||||
|
||||
await RisingEdge(dut.i_clk)
|
||||
|
||||
dut.i_cpu_tag.value = 0xaa
|
||||
|
||||
await RisingEdge(dut.i_clk)
|
||||
|
||||
dut.i_cpu_tag.value = 0
|
||||
|
||||
await Timer(1, "us")
|
||||
|
||||
@cocotb.test
|
||||
async def test_request_ownership(dut):
|
||||
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
|
||||
cocotb.start_soon(handle_cache_arrays(dut))
|
||||
cocotb.start_soon(handle_lru_arrays(dut))
|
||||
cocotb.start_soon(handle_writeback(dut))
|
||||
cocotb.start_soon(handle_bus_interface(dut))
|
||||
|
||||
dut.i_cpu_we.value = 0
|
||||
|
||||
dut.i_rst.value = Immediate(1)
|
||||
for _ in range(10):
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_rst.value = 0
|
||||
|
||||
await RisingEdge(dut.o_rdy)
|
||||
|
||||
INDEX = 2
|
||||
|
||||
# Write with tag way + 1
|
||||
for way in range(WAYS):
|
||||
write_cacheline(INDEX, way, bytes([0xaa] * 64), MesiState.MESI_SHARED, way+1)
|
||||
|
||||
# write with tag 0x2
|
||||
dut.i_cpu_tag.value = 0
|
||||
dut.i_cpu_index.value = INDEX
|
||||
dut.i_cpu_offset.value = 2
|
||||
dut.i_cpu_data.value = 0xaa
|
||||
|
||||
dut.i_rdy.value = 1
|
||||
|
||||
dut.i_cpu_we.value = 1
|
||||
|
||||
await RisingEdge(dut.i_clk)
|
||||
|
||||
dut.i_cpu_data.value = 0
|
||||
dut.i_cpu_tag.value = 2
|
||||
|
||||
await RisingEdge(dut.i_clk)
|
||||
|
||||
dut.i_cpu_tag.value = 0
|
||||
|
||||
await Timer(1, "us")
|
||||
|
||||
@cocotb.test
|
||||
async def test_way_read_thrash(dut):
|
||||
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
|
||||
cocotb.start_soon(handle_cache_arrays(dut))
|
||||
cocotb.start_soon(handle_lru_arrays(dut))
|
||||
cocotb.start_soon(handle_writeback(dut))
|
||||
cocotb.start_soon(handle_bus_interface(dut))
|
||||
|
||||
dut.i_cpu_we.value = 0
|
||||
dut.i_rst.value = Immediate(1)
|
||||
for _ in range(10):
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_rst.value = 0
|
||||
|
||||
await RisingEdge(dut.o_rdy)
|
||||
|
||||
for tag in range(32):
|
||||
dut.i_cpu_tag.value = tag
|
||||
dut.i_cpu_index.value = 0
|
||||
dut.i_cpu_offset.value = 0
|
||||
dut.i_rdy.value = 1
|
||||
await RisingEdge(dut.i_clk)
|
||||
while not dut.o_rdy.value:
|
||||
await RisingEdge(dut.i_clk)
|
||||
|
||||
await Timer(1, "us")
|
||||
|
||||
|
||||
@cocotb.test
|
||||
async def test_write_waw(dut):
|
||||
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
|
||||
cocotb.start_soon(handle_cache_arrays(dut))
|
||||
cocotb.start_soon(handle_lru_arrays(dut))
|
||||
cocotb.start_soon(handle_writeback(dut))
|
||||
cocotb.start_soon(handle_bus_interface(dut))
|
||||
|
||||
dut.i_rst.value = Immediate(1)
|
||||
for _ in range(10):
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_rst.value = 0
|
||||
|
||||
await RisingEdge(dut.o_rdy)
|
||||
|
||||
INDEX = 7
|
||||
TAG = 0xabcd
|
||||
|
||||
|
||||
# unused tag
|
||||
dut.i_cpu_tag.value = 0xffff
|
||||
dut.i_rdy.value = 1
|
||||
|
||||
dut.i_cpu_we.value = 1
|
||||
dut.i_cpu_index.value = INDEX
|
||||
dut.i_cpu_offset.value = 1
|
||||
dut.i_cpu_data.value = 0xaa
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_cpu_tag.value = TAG
|
||||
|
||||
while not dut.o_rdy.value:
|
||||
await RisingEdge(dut.i_clk)
|
||||
|
||||
dut.i_cpu_we.value = 1
|
||||
dut.i_cpu_index.value = INDEX
|
||||
dut.i_cpu_offset.value = 2
|
||||
dut.i_cpu_data.value = 0x55
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_cpu_tag.value = TAG
|
||||
|
||||
while not dut.o_rdy.value:
|
||||
await RisingEdge(dut.i_clk)
|
||||
|
||||
dut.i_cpu_we.value = 0
|
||||
|
||||
await Timer(1, "us")
|
||||
|
||||
@cocotb.test
|
||||
async def test_write_raw(dut):
|
||||
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
|
||||
cocotb.start_soon(handle_cache_arrays(dut))
|
||||
cocotb.start_soon(handle_lru_arrays(dut))
|
||||
cocotb.start_soon(handle_writeback(dut))
|
||||
cocotb.start_soon(handle_bus_interface(dut))
|
||||
|
||||
dut.i_rst.value = Immediate(1)
|
||||
for _ in range(10):
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_rst.value = 0
|
||||
|
||||
await RisingEdge(dut.o_rdy)
|
||||
|
||||
INDEX = 7
|
||||
TAG = 0xabcd
|
||||
|
||||
|
||||
# unused tag
|
||||
dut.i_cpu_tag.value = 0xffff
|
||||
dut.i_rdy.value = 1
|
||||
|
||||
dut.i_cpu_we.value = 1
|
||||
dut.i_cpu_index.value = INDEX
|
||||
dut.i_cpu_offset.value = 1
|
||||
dut.i_cpu_data.value = 0x41
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_cpu_tag.value = TAG
|
||||
|
||||
while not dut.o_rdy.value:
|
||||
await RisingEdge(dut.i_clk)
|
||||
|
||||
dut.i_cpu_we.value = 0
|
||||
dut.i_cpu_index.value = INDEX
|
||||
dut.i_cpu_offset.value = 1
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_cpu_tag.value = TAG
|
||||
|
||||
while not dut.o_rdy.value:
|
||||
await RisingEdge(dut.i_clk)
|
||||
|
||||
dut.i_cpu_we.value = 0
|
||||
|
||||
await Timer(1, "us")
|
||||
34
sim/application_wrapper/cache/application_wrapper_cache_top_test.py
vendored
Normal file
34
sim/application_wrapper/cache/application_wrapper_cache_top_test.py
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
import cocotb
|
||||
from cocotb.handle import Immediate
|
||||
|
||||
|
||||
from cocotb.clock import Clock
|
||||
from cocotb.triggers import Timer, RisingEdge
|
||||
|
||||
|
||||
import logging
|
||||
|
||||
import random
|
||||
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
CLK_PERIOD = 5
|
||||
|
||||
@cocotb.test
|
||||
async def test_sanity(dut):
|
||||
# Request a read from the cache, then request a write to the cache
|
||||
cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start())
|
||||
|
||||
dut.i_cpu_we.value = 0
|
||||
|
||||
dut.i_rst.value = Immediate(1)
|
||||
for _ in range(10):
|
||||
await RisingEdge(dut.i_clk)
|
||||
dut.i_rst.value = 0
|
||||
|
||||
await RisingEdge(dut.o_cpu_rdy)
|
||||
|
||||
await Timer(10, "us")
|
||||
14
sim/application_wrapper/cache/cache.yaml
vendored
14
sim/application_wrapper/cache/cache.yaml
vendored
@@ -4,4 +4,18 @@ tests:
|
||||
modules:
|
||||
- "application_wrapper_cache_arrays_test"
|
||||
sources: "sources.list"
|
||||
waves: True
|
||||
|
||||
- name: "application_wrapper_cache_miss_handler_test"
|
||||
toplevel: "application_wrapper_cache_miss_handler"
|
||||
modules:
|
||||
- "application_wrapper_cache_miss_handler_test"
|
||||
sources: "sources.list"
|
||||
waves: True
|
||||
|
||||
- name: "application_wrapper_cache_top_test"
|
||||
toplevel: "application_wrapper_cache_top"
|
||||
modules:
|
||||
- "application_wrapper_cache_top_test"
|
||||
sources: "sources.list"
|
||||
waves: True
|
||||
74
src/application_wrapper/cache/application_wrapper_cache_bus_interface.sv
vendored
Normal file
74
src/application_wrapper/cache/application_wrapper_cache_bus_interface.sv
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
import application_wrapper_cache_pkg::*;
|
||||
|
||||
module application_wrapper_cache_bus_interface #(
|
||||
parameter DATA_W = 64*8,
|
||||
|
||||
// these are all wip
|
||||
localparam REQ_W = 32,
|
||||
localparam RSP_W = 32,
|
||||
localparam DAT_W = 512+64,
|
||||
localparam SNP_W = 32
|
||||
) (
|
||||
input logic i_clk,
|
||||
input logic i_rst,
|
||||
|
||||
input logic [31:0] i_cpu_memory_addr,
|
||||
input logic i_cpu_memory_valid,
|
||||
input cache_cmd_e i_cpu_memory_cmd,
|
||||
|
||||
output logic [DATA_W-1:0] o_cpu_memory_data,
|
||||
output logic o_cpu_memory_done,
|
||||
output cache_resp_e o_cpu_memory_resp,
|
||||
|
||||
output logic [31:0] o_snoop_addr,
|
||||
output snoop_cmd_e o_snoop_cmd,
|
||||
output logic o_snoop_valid,
|
||||
|
||||
input logic [31:0] i_writeback_addr,
|
||||
input logic [DATA_W-1:0] i_writeback_data,
|
||||
input logic i_writeback_valid,
|
||||
output logic o_writeback_done,
|
||||
|
||||
// CHI Interface
|
||||
output logic o_txsactive,
|
||||
input logic o_rxsactive,
|
||||
|
||||
output logic o_txlinkactivereq,
|
||||
input logic i_txlinkactiveack,
|
||||
|
||||
|
||||
output logic o_txreqflitpend,
|
||||
output logic o_txreqflitv,
|
||||
output logic [REQ_W-1:0] o_txreqflit,
|
||||
input logic i_txreqlcrdv,
|
||||
|
||||
output logic o_txrspflitpend,
|
||||
output logic o_txrspflitv,
|
||||
output logic [RSP_W-1:0] o_txrspflit,
|
||||
input logic i_txrsplcrdv,
|
||||
|
||||
output logic o_txdatflitpend,
|
||||
output logic o_txdatflitv,
|
||||
output logic [DAT_W-1:0] o_txdatflit,
|
||||
input logic i_txdatlcrdv,
|
||||
|
||||
input logic i_rxlinkactivereq,
|
||||
output logic o_rxlinkactiveack,
|
||||
|
||||
input logic i_rxrspflitpend,
|
||||
input logic i_rxrspflitv,
|
||||
input logic [RSP_W-1:0] i_rxrspflit,
|
||||
output logic i_rxrsplcrdv,
|
||||
|
||||
input logic i_rxdatflitpend,
|
||||
input logic i_rxdatflitv,
|
||||
input logic [DAT_W-1:0] i_rxdatflit,
|
||||
output logic o_rxdatlcrdv,
|
||||
|
||||
input logic i_rxsnpflitpend,
|
||||
input logic i_rxsnpflitv,
|
||||
input logic [SNP_W-1:0] i_rxsnpflit,
|
||||
output logic o_rxsnplcrdv
|
||||
);
|
||||
|
||||
endmodule
|
||||
31
src/application_wrapper/cache/application_wrapper_cache_lru.sv
vendored
Normal file
31
src/application_wrapper/cache/application_wrapper_cache_lru.sv
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
module application_wrapper_cache_lru #(
|
||||
// This should be NUM_WAYS - 1
|
||||
parameter LRU_W = 3,
|
||||
parameter NUM_SETS = 64,
|
||||
|
||||
localparam INDEX_W = $clog2(NUM_SETS)
|
||||
) (
|
||||
input logic i_clk,
|
||||
|
||||
input logic [INDEX_W-1:0] i_read_index,
|
||||
input logic i_read_valid,
|
||||
output logic [LRU_W-1:0] o_read_data,
|
||||
|
||||
input logic [INDEX_W-1:0] i_write_index,
|
||||
input logic i_write_valid,
|
||||
input logic [LRU_W-1:0] i_write_data
|
||||
);
|
||||
|
||||
logic [LRU_W-1:0] lru_array [NUM_SETS];
|
||||
|
||||
always @(posedge i_clk) begin
|
||||
if (i_write_valid) begin
|
||||
lru_array[i_write_index] = i_write_data;
|
||||
end
|
||||
|
||||
if (i_read_valid) begin
|
||||
o_read_data = lru_array[i_read_index];
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
477
src/application_wrapper/cache/application_wrapper_cache_miss_handler.sv
vendored
Normal file
477
src/application_wrapper/cache/application_wrapper_cache_miss_handler.sv
vendored
Normal file
@@ -0,0 +1,477 @@
|
||||
import application_wrapper_cache_pkg::*;
|
||||
|
||||
module application_wrapper_cache_miss_handler #(
|
||||
parameter NUM_WAYS = 4,
|
||||
parameter NUM_SETS = 64,
|
||||
|
||||
localparam CPU_W = 8,
|
||||
localparam DATA_W = 64*8,
|
||||
localparam OFFSET_W = 6,
|
||||
localparam INDEX_W = $clog2(NUM_SETS),
|
||||
localparam TAG_W = 32 - INDEX_W - OFFSET_W,
|
||||
localparam LRU_W = NUM_WAYS-1,
|
||||
|
||||
localparam META_W = TAG_W + 2
|
||||
) (
|
||||
input logic i_clk,
|
||||
input logic i_rst,
|
||||
|
||||
// NOTE: tag is physical tag, expected 1 cycle after the index and the offset
|
||||
input logic [TAG_W-1:0] i_cpu_tag,
|
||||
input logic [INDEX_W-1:0] i_cpu_index,
|
||||
input logic [OFFSET_W-1:0] i_cpu_offset,
|
||||
|
||||
input logic i_rdy,
|
||||
output logic o_rdy,
|
||||
|
||||
input logic i_cpu_we,
|
||||
|
||||
input logic [CPU_W-1:0] i_cpu_data,
|
||||
output logic [CPU_W-1:0] o_cpu_data,
|
||||
|
||||
output logic [INDEX_W-1:0] o_read_index,
|
||||
output logic o_read_valid,
|
||||
|
||||
input logic [DATA_W-1:0] i_read_data [NUM_WAYS],
|
||||
input logic [META_W-1:0] i_read_meta [NUM_WAYS],
|
||||
|
||||
output logic [INDEX_W-1:0] o_write_index,
|
||||
output logic [NUM_WAYS-1:0] o_write_valid,
|
||||
|
||||
output logic [DATA_W-1:0] o_write_data,
|
||||
output logic [META_W-1:0] o_write_meta,
|
||||
|
||||
output logic [INDEX_W-1:0] o_lru_read_index,
|
||||
output logic o_lru_read_valid,
|
||||
input logic [LRU_W-1:0] i_lru_read_data,
|
||||
|
||||
output logic [INDEX_W-1:0] o_lru_write_index,
|
||||
output logic o_lru_write_valid,
|
||||
output logic [LRU_W-1:0] o_lru_write_data,
|
||||
|
||||
output logic [DATA_W-1:0] o_writeback_data,
|
||||
output logic [31:0] o_writeback_addr,
|
||||
output logic o_writeback_valid,
|
||||
input logic i_writeback_done,
|
||||
|
||||
output logic [31:0] o_memory_addr,
|
||||
output logic o_memory_valid,
|
||||
output cache_cmd_e o_memory_cmd,
|
||||
|
||||
input logic [DATA_W-1:0] i_memory_data,
|
||||
input logic i_memory_done,
|
||||
input cache_resp_e i_memory_resp
|
||||
);
|
||||
|
||||
|
||||
enum logic [3:0] {
|
||||
RESET,
|
||||
CLEAR_MEMORY,
|
||||
IDLE,
|
||||
CHECK_VICTIM,
|
||||
WRITEBACK,
|
||||
WAIT_WRITEBACK_ACK,
|
||||
REQUEST_MEMORY,
|
||||
WAIT_MEMORY,
|
||||
REQUEST_OWNERSHIP
|
||||
} state, state_next;
|
||||
|
||||
logic [INDEX_W-1:0] clear_index, clear_index_next;
|
||||
|
||||
logic cpu_we_d1;
|
||||
logic [CPU_W-1:0] cpu_i_data_d1;
|
||||
|
||||
logic [TAG_W-1:0] cpu_tag_d1;
|
||||
logic [INDEX_W-1:0] cpu_index_d1, cpu_index_d2;
|
||||
logic [OFFSET_W-1:0] cpu_offset_d1, cpu_offset_d2;
|
||||
|
||||
logic [TAG_W-1:0] cpu_tag_new, cpu_tag_new_next;
|
||||
logic [INDEX_W-1:0] cpu_index_new, cpu_index_new_next;
|
||||
logic [OFFSET_W-1:0] cpu_offset_new, cpu_offset_new_next;
|
||||
logic [$clog2(NUM_WAYS)-1:0] cpu_way_new, cpu_way_new_next;
|
||||
logic [7:0] cpu_data_new, cpu_data_new_next;
|
||||
logic cpu_we_new, cpu_we_new_next;
|
||||
|
||||
logic previous_was_valid, previous_was_valid_next;
|
||||
|
||||
logic way_match_found;
|
||||
logic [NUM_WAYS-1:0] way_select_mask;
|
||||
logic [$clog2(NUM_WAYS)-1:0] way_select_idx;
|
||||
mesi_e mesi;
|
||||
logic [TAG_W-1:0] tag;
|
||||
|
||||
logic [31:0] read_req_addr, read_req_addr_next;
|
||||
|
||||
always_ff @(posedge i_clk) begin
|
||||
if (i_rst) begin
|
||||
state <= RESET;
|
||||
end else begin
|
||||
state <= state_next;
|
||||
end
|
||||
|
||||
previous_was_valid <= previous_was_valid_next;
|
||||
|
||||
read_req_addr <= read_req_addr_next;
|
||||
|
||||
cpu_offset_new <= cpu_offset_new_next;
|
||||
cpu_index_new <= cpu_index_new_next;
|
||||
cpu_tag_new <= cpu_tag_new_next;
|
||||
cpu_way_new <= cpu_way_new_next;
|
||||
cpu_data_new <= cpu_data_new_next;
|
||||
cpu_we_new <= cpu_we_new_next;
|
||||
|
||||
clear_index <= clear_index_next;
|
||||
cpu_we_d1 <= i_cpu_we;
|
||||
cpu_i_data_d1 <= i_cpu_data;
|
||||
cpu_index_d1 <= i_cpu_index;
|
||||
cpu_index_d2 <= cpu_index_d1;
|
||||
cpu_tag_d1 <= i_cpu_tag;
|
||||
cpu_offset_d1 <= i_cpu_offset;
|
||||
cpu_offset_d2 <= cpu_offset_d1;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
o_rdy = '0;
|
||||
o_cpu_data = '0;
|
||||
|
||||
o_read_valid = '0;
|
||||
o_read_index = '0;
|
||||
|
||||
o_write_valid = '0;
|
||||
o_write_index = '0;
|
||||
o_write_data = '0;
|
||||
o_write_meta = '0;
|
||||
|
||||
o_lru_read_valid = '0;
|
||||
o_lru_read_index = '0;
|
||||
o_lru_write_valid = '0;
|
||||
o_lru_write_index = '0;
|
||||
o_lru_write_data = '0;
|
||||
|
||||
o_writeback_data = '0;
|
||||
o_writeback_addr = '0;
|
||||
o_writeback_valid = '0;
|
||||
|
||||
o_memory_addr = '0;
|
||||
o_memory_valid = '0;
|
||||
o_memory_cmd = CACHE_CMD_NONE;
|
||||
|
||||
way_match_found = '0;
|
||||
way_select_mask = '0;
|
||||
way_select_idx = '0;
|
||||
mesi = MESI_INVALID;
|
||||
tag = '0;
|
||||
|
||||
cpu_offset_new_next = cpu_offset_new;
|
||||
cpu_index_new_next = cpu_index_new;
|
||||
cpu_tag_new_next = cpu_tag_new;
|
||||
cpu_way_new_next = cpu_way_new;
|
||||
cpu_data_new_next = cpu_data_new;
|
||||
cpu_we_new_next = cpu_we_new;
|
||||
|
||||
read_req_addr_next = read_req_addr;
|
||||
|
||||
clear_index_next = clear_index;
|
||||
|
||||
previous_was_valid_next = previous_was_valid;
|
||||
|
||||
state_next = state;
|
||||
|
||||
|
||||
case (state)
|
||||
RESET: begin
|
||||
state_next = CLEAR_MEMORY;
|
||||
clear_index_next = '0;
|
||||
previous_was_valid_next = '0;
|
||||
end
|
||||
|
||||
CLEAR_MEMORY: begin
|
||||
o_write_valid = '1;
|
||||
o_write_data = '0;
|
||||
o_write_meta = {MESI_INVALID, (TAG_W)'('0)};
|
||||
o_write_index = clear_index;
|
||||
|
||||
o_lru_write_index = clear_index;
|
||||
o_lru_write_data = '0;
|
||||
o_lru_write_valid = '1;
|
||||
|
||||
clear_index_next = clear_index + 1;
|
||||
if (clear_index_next == '0) begin
|
||||
state_next = IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
IDLE: begin
|
||||
// by default, o_rdy is 1 unless something is wrong
|
||||
o_rdy = '1;
|
||||
|
||||
if (previous_was_valid) begin
|
||||
// data from previous cycle that was read from arrays
|
||||
way_match_found = '0;
|
||||
way_select_mask = '0;
|
||||
for (int i = 0; i < NUM_WAYS; i++) begin
|
||||
{mesi, tag} = i_read_meta[i];
|
||||
if (tag == i_cpu_tag && mesi != MESI_INVALID) begin
|
||||
way_match_found = '1;
|
||||
way_select_mask[i] = '1;
|
||||
way_select_idx = 2'(i);
|
||||
break;
|
||||
end
|
||||
end
|
||||
|
||||
// We have a match, so either read or write data
|
||||
if (way_match_found) begin
|
||||
if (cpu_we_d1) begin
|
||||
// write data back to the cache array
|
||||
// check if we are in the M or E states before we write.
|
||||
// If we are in S then we need to request ownership before
|
||||
// we can modify it.
|
||||
if (mesi == MESI_MODIFIED || mesi == MESI_EXCLUSIVE) begin
|
||||
o_write_data = i_read_data[way_select_idx];
|
||||
o_write_data[cpu_offset_d1*8 +: CPU_W] = cpu_i_data_d1;
|
||||
o_write_meta = {MESI_MODIFIED, i_cpu_tag};
|
||||
o_write_valid = way_select_mask;
|
||||
o_write_index = cpu_index_d1;
|
||||
|
||||
end else begin
|
||||
o_rdy = '0;
|
||||
|
||||
o_memory_addr = {i_cpu_tag, cpu_index_d1, (OFFSET_W)'('0)};
|
||||
o_memory_cmd = CACHE_CMD_CLEAN_UNIQUE;
|
||||
o_memory_valid = '1;
|
||||
|
||||
cpu_offset_new_next = cpu_offset_d1;
|
||||
cpu_index_new_next = cpu_index_d1;
|
||||
cpu_tag_new_next = i_cpu_tag;
|
||||
cpu_way_new_next = way_select_idx;
|
||||
cpu_data_new_next = cpu_i_data_d1;
|
||||
|
||||
state_next = REQUEST_OWNERSHIP;
|
||||
end
|
||||
end else begin
|
||||
// Send the data to the CPU
|
||||
o_cpu_data = i_read_data[way_select_idx][cpu_offset_d1*8 +: CPU_W];
|
||||
end
|
||||
|
||||
// update lru
|
||||
// start by copying the read data, then change the bits
|
||||
// based on what we matched.
|
||||
o_lru_write_index = cpu_index_d1;
|
||||
o_lru_write_data = i_lru_read_data;
|
||||
o_lru_write_valid = '1;
|
||||
|
||||
case (way_select_mask)
|
||||
4'b0001: begin
|
||||
o_lru_write_data[0] = '1;
|
||||
o_lru_write_data[1] = '1;
|
||||
end
|
||||
|
||||
4'b0010: begin
|
||||
o_lru_write_data[0] = '1;
|
||||
o_lru_write_data[1] = '0;
|
||||
end
|
||||
|
||||
4'b0100: begin
|
||||
o_lru_write_data[0] = '0;
|
||||
o_lru_write_data[2] = '1;
|
||||
end
|
||||
|
||||
4'b1000: begin
|
||||
o_lru_write_data[0] = '0;
|
||||
o_lru_write_data[2] = '0;
|
||||
end
|
||||
|
||||
default: begin
|
||||
end
|
||||
endcase
|
||||
end else begin
|
||||
o_rdy = '0;
|
||||
state_next = CHECK_VICTIM;
|
||||
|
||||
cpu_data_new_next = cpu_i_data_d1;
|
||||
cpu_we_new_next = cpu_we_d1;
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
// Read from arrays
|
||||
o_read_index = i_cpu_index;
|
||||
o_read_valid = i_rdy & o_rdy;
|
||||
|
||||
o_lru_read_index = i_cpu_index;
|
||||
o_lru_read_valid = i_rdy & o_rdy;
|
||||
|
||||
previous_was_valid_next = '1;
|
||||
end
|
||||
|
||||
REQUEST_OWNERSHIP: begin
|
||||
if (i_memory_done) begin
|
||||
// write to the cacheline here.
|
||||
o_write_data = i_read_data[cpu_way_new];
|
||||
o_write_data[cpu_offset_new*8 +: CPU_W] = cpu_data_new;
|
||||
o_write_meta = {MESI_MODIFIED, cpu_tag_new};
|
||||
o_write_valid = (1 << cpu_way_new);
|
||||
o_write_index = cpu_index_new;
|
||||
state_next = IDLE;
|
||||
|
||||
// update lru
|
||||
// start by copying the read data, then change the bits
|
||||
// based on what we matched.
|
||||
o_lru_write_index = cpu_index_new;
|
||||
o_lru_write_data = i_lru_read_data;
|
||||
o_lru_write_valid = '1;
|
||||
|
||||
case (1 << cpu_way_new)
|
||||
4'b0001: begin
|
||||
o_lru_write_data[0] = '1;
|
||||
o_lru_write_data[1] = '1;
|
||||
end
|
||||
|
||||
4'b0010: begin
|
||||
o_lru_write_data[0] = '1;
|
||||
o_lru_write_data[1] = '0;
|
||||
end
|
||||
|
||||
4'b0100: begin
|
||||
o_lru_write_data[0] = '0;
|
||||
o_lru_write_data[2] = '1;
|
||||
end
|
||||
|
||||
4'b1000: begin
|
||||
o_lru_write_data[0] = '0;
|
||||
o_lru_write_data[2] = '0;
|
||||
end
|
||||
|
||||
default: begin
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
CHECK_VICTIM: begin
|
||||
// first use the LRU, then overwrite if there was an invalid way
|
||||
|
||||
way_select_idx[0] = i_lru_read_data[0];
|
||||
way_select_idx[1] = way_select_idx[0] ? i_lru_read_data[2] : i_lru_read_data[1];
|
||||
|
||||
for (int i = 0; i < NUM_WAYS; i++) begin
|
||||
{mesi, tag} = i_read_meta[i];
|
||||
if (mesi == MESI_INVALID) begin
|
||||
way_select_idx = 2'(i);
|
||||
break;
|
||||
end
|
||||
end
|
||||
|
||||
{mesi, tag} = i_read_meta[way_select_idx];
|
||||
|
||||
if (mesi == MESI_MODIFIED) begin
|
||||
o_writeback_data = i_read_data[way_select_idx];
|
||||
o_writeback_addr = {tag, cpu_index_d2, (OFFSET_W)'('0)};
|
||||
o_writeback_valid = '1;
|
||||
state_next = WAIT_WRITEBACK_ACK;
|
||||
end else if (mesi == MESI_EXCLUSIVE || mesi == MESI_SHARED) begin
|
||||
o_memory_addr = {tag, cpu_index_d2, (OFFSET_W)'('0)};
|
||||
o_memory_valid = '1;
|
||||
o_memory_cmd = CACHE_CMD_EVICT;
|
||||
state_next = WAIT_WRITEBACK_ACK;
|
||||
end else begin
|
||||
state_next = REQUEST_MEMORY;
|
||||
end
|
||||
|
||||
read_req_addr_next = {cpu_tag_d1, cpu_index_d2, (OFFSET_W)'('0)};
|
||||
cpu_offset_new_next = cpu_offset_d2;
|
||||
cpu_index_new_next = cpu_index_d2;
|
||||
cpu_tag_new_next = cpu_tag_d1;
|
||||
cpu_way_new_next = way_select_idx;
|
||||
end
|
||||
|
||||
WAIT_WRITEBACK_ACK: begin
|
||||
// This state is also used when sending the EVICT command,
|
||||
// before sending the read.
|
||||
if (i_writeback_done || i_memory_done) begin
|
||||
state_next = REQUEST_MEMORY;
|
||||
end
|
||||
end
|
||||
|
||||
REQUEST_MEMORY: begin
|
||||
// This state can be put into WAIT_WRITEBACK_ACK and CHECK_VICTIM
|
||||
o_memory_addr = read_req_addr;
|
||||
o_memory_valid = '1;
|
||||
|
||||
if (cpu_we_new) begin
|
||||
o_memory_cmd = CACHE_CMD_READ_UNIQUE;
|
||||
end else begin
|
||||
o_memory_cmd = CACHE_CMD_READ;
|
||||
end
|
||||
state_next = WAIT_MEMORY;
|
||||
end
|
||||
|
||||
WAIT_MEMORY: begin
|
||||
if (i_memory_done) begin
|
||||
o_write_valid = (1 << cpu_way_new);
|
||||
o_write_data = i_memory_data;
|
||||
o_write_index = cpu_index_new;
|
||||
if (cpu_we_new) begin
|
||||
o_write_data[cpu_offset_new*8 +: CPU_W] = cpu_data_new;
|
||||
o_write_meta = {MESI_MODIFIED, cpu_tag_new};
|
||||
end else begin
|
||||
if (i_memory_resp == CACHE_RSP_SHARED) begin
|
||||
o_write_meta = {MESI_SHARED, cpu_tag_new};
|
||||
end else if (i_memory_resp == CACHE_RSP_EXCLUSIVE) begin
|
||||
o_write_meta = {MESI_EXCLUSIVE, cpu_tag_new};
|
||||
end
|
||||
o_cpu_data = i_memory_data[cpu_offset_new*8 +: CPU_W];
|
||||
end
|
||||
|
||||
o_rdy = '1;
|
||||
|
||||
// update lru
|
||||
// start by copying the read data, then change the bits
|
||||
// based on what we matched.
|
||||
o_lru_write_index = cpu_index_new;
|
||||
o_lru_write_data = i_lru_read_data;
|
||||
o_lru_write_valid = '1;
|
||||
|
||||
case (1 << cpu_way_new)
|
||||
4'b0001: begin
|
||||
o_lru_write_data[0] = '1;
|
||||
o_lru_write_data[1] = '1;
|
||||
end
|
||||
|
||||
4'b0010: begin
|
||||
o_lru_write_data[0] = '1;
|
||||
o_lru_write_data[1] = '0;
|
||||
end
|
||||
|
||||
4'b0100: begin
|
||||
o_lru_write_data[0] = '0;
|
||||
o_lru_write_data[2] = '1;
|
||||
end
|
||||
|
||||
4'b1000: begin
|
||||
o_lru_write_data[0] = '0;
|
||||
o_lru_write_data[2] = '0;
|
||||
end
|
||||
|
||||
default: begin
|
||||
end
|
||||
endcase
|
||||
|
||||
o_read_index = i_cpu_index;
|
||||
o_read_valid = i_rdy & o_rdy;
|
||||
|
||||
o_lru_read_index = i_cpu_index;
|
||||
o_lru_read_valid = i_rdy & o_rdy;
|
||||
|
||||
state_next = IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
state_next = IDLE;
|
||||
end
|
||||
|
||||
endcase
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -11,18 +11,31 @@ package application_wrapper_cache_pkg;
|
||||
} page_table_entry_t;
|
||||
|
||||
typedef enum logic [2:0] {
|
||||
CACHE_NONE,
|
||||
CACHE_READ_SHARED,
|
||||
CACHE_READ_UNIQUE,
|
||||
CACHE_WRITE,
|
||||
CACHE_CLEAN_UNIQUE
|
||||
CACHE_CMD_NONE,
|
||||
CACHE_CMD_READ,
|
||||
CACHE_CMD_READ_UNIQUE,
|
||||
CACHE_CMD_WRITE,
|
||||
CACHE_CMD_CLEAN_UNIQUE,
|
||||
CACHE_CMD_EVICT
|
||||
} cache_cmd_e;
|
||||
|
||||
typedef enum logic [1:0] {
|
||||
MODIFIED,
|
||||
EXCLUSIVE,
|
||||
SHARED,
|
||||
INVALID
|
||||
CACHE_RSP_NONE,
|
||||
CACHE_RSP_SHARED,
|
||||
CACHE_RSP_EXCLUSIVE
|
||||
} cache_resp_e;
|
||||
|
||||
typedef enum logic [1:0] {
|
||||
MESI_INVALID,
|
||||
MESI_SHARED,
|
||||
MESI_EXCLUSIVE,
|
||||
MESI_MODIFIED
|
||||
} mesi_e;
|
||||
|
||||
typedef enum logic [1:0] {
|
||||
CACHE_SNP_NONE,
|
||||
CACHE_SNP_INVALIDATE,
|
||||
CACHE_SNP_SHARE
|
||||
} snoop_cmd_e;
|
||||
|
||||
endpackage
|
||||
40
src/application_wrapper/cache/application_wrapper_cache_snooping.sv
vendored
Normal file
40
src/application_wrapper/cache/application_wrapper_cache_snooping.sv
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
import application_wrapper_cache_pkg::*;
|
||||
|
||||
module application_wrapper_cache_snooping #(
|
||||
parameter NUM_WAYS = 4,
|
||||
parameter NUM_SETS = 64,
|
||||
|
||||
localparam CPU_W = 8,
|
||||
localparam DATA_W = 64*8,
|
||||
localparam OFFSET_W = 6,
|
||||
localparam INDEX_W = $clog2(NUM_SETS),
|
||||
localparam TAG_W = 32 - INDEX_W - OFFSET_W,
|
||||
localparam LRU_W = NUM_WAYS-1,
|
||||
|
||||
localparam META_W = TAG_W + 2
|
||||
) (
|
||||
input logic i_clk,
|
||||
input logic i_rst,
|
||||
|
||||
input logic [31:0] i_snoop_addr,
|
||||
input snoop_cmd_e i_snoop_cmd,
|
||||
|
||||
output logic [INDEX_W-1:0] o_read_index,
|
||||
output logic o_read_valid,
|
||||
|
||||
input logic [DATA_W-1:0] i_read_data [NUM_WAYS],
|
||||
input logic [META_W-1:0] i_read_meta [NUM_WAYS],
|
||||
|
||||
output logic [INDEX_W-1:0] o_write_index,
|
||||
output logic [NUM_WAYS-1:0] o_write_valid,
|
||||
|
||||
output logic [DATA_W-1:0] o_write_data,
|
||||
output logic [META_W-1:0] o_write_meta,
|
||||
|
||||
output logic [DATA_W-1:0] o_writeback_data,
|
||||
output logic [31:0] o_writeback_addr,
|
||||
output logic o_writeback_valid,
|
||||
input logic i_writeback_done
|
||||
);
|
||||
|
||||
endmodule
|
||||
@@ -0,0 +1,366 @@
|
||||
import application_wrapper_cache_pkg::*;
|
||||
|
||||
module application_wrapper_cache_top #(
|
||||
parameter NUM_WAYS = 4,
|
||||
parameter NUM_SETS = 64,
|
||||
|
||||
localparam DATA_W = 64*8,
|
||||
localparam OFFSET_W = 6,
|
||||
localparam INDEX_W = $clog2(NUM_SETS),
|
||||
localparam TAG_W = 32 - INDEX_W - OFFSET_W,
|
||||
localparam LRU_W = NUM_WAYS-1,
|
||||
|
||||
localparam META_W = TAG_W + 2,
|
||||
|
||||
localparam REQ_W = 32,
|
||||
localparam RSP_W = 32,
|
||||
localparam DAT_W = 512+64,
|
||||
localparam SNP_W = 32
|
||||
) (
|
||||
input logic i_clk,
|
||||
input logic i_rst,
|
||||
|
||||
// CPU Interface
|
||||
input logic [31:0] i_cpu_addr,
|
||||
input logic i_cpu_we,
|
||||
input logic i_cpu_sync,
|
||||
input logic [7:0] i_cpu_data,
|
||||
output logic [7:0] o_cpu_data,
|
||||
input logic i_cpu_rdy,
|
||||
output logic o_cpu_rdy,
|
||||
|
||||
|
||||
// CHI Interface
|
||||
output logic o_txsactive,
|
||||
input logic o_rxsactive,
|
||||
|
||||
output logic o_txlinkactivereq,
|
||||
input logic i_txlinkactiveack,
|
||||
|
||||
|
||||
output logic o_txreqflitpend,
|
||||
output logic o_txreqflitv,
|
||||
output logic [REQ_W-1:0] o_txreqflit,
|
||||
input logic i_txreqlcrdv,
|
||||
|
||||
output logic o_txrspflitpend,
|
||||
output logic o_txrspflitv,
|
||||
output logic [RSP_W-1:0] o_txrspflit,
|
||||
input logic i_txrsplcrdv,
|
||||
|
||||
output logic o_txdatflitpend,
|
||||
output logic o_txdatflitv,
|
||||
output logic [DAT_W-1:0] o_txdatflit,
|
||||
input logic i_txdatlcrdv,
|
||||
|
||||
input logic i_rxlinkactivereq,
|
||||
output logic o_rxlinkactiveack,
|
||||
|
||||
input logic i_rxrspflitpend,
|
||||
input logic i_rxrspflitv,
|
||||
input logic [RSP_W-1:0] i_rxrspflit,
|
||||
output logic i_rxrsplcrdv,
|
||||
|
||||
input logic i_rxdatflitpend,
|
||||
input logic i_rxdatflitv,
|
||||
input logic [DAT_W-1:0] i_rxdatflit,
|
||||
output logic o_rxdatlcrdv,
|
||||
|
||||
input logic i_rxsnpflitpend,
|
||||
input logic i_rxsnpflitv,
|
||||
input logic [SNP_W-1:0] i_rxsnpflit,
|
||||
output logic o_rxsnplcrdv
|
||||
);
|
||||
|
||||
|
||||
// ngl idk what the difference is between mmu_rdy and mmu_valid
|
||||
logic mmu_rdy;
|
||||
logic mmu_valid;
|
||||
|
||||
logic [31:0] mmu_phys_address;
|
||||
page_table_entry_t mmu_page_table_entry;
|
||||
|
||||
application_wrapper_mmu #(
|
||||
.TLB_COUNT (32),
|
||||
.ADDR_WIDTH (32),
|
||||
.LOG2_PAGE_SIZE (12)
|
||||
) u_mmu (
|
||||
.i_clk (i_clk),
|
||||
.i_rst (i_rst),
|
||||
|
||||
.i_cpu_addr (i_cpu_addr),
|
||||
.i_rdy (i_cpu_rdy | o_cpu_rdy),
|
||||
.o_rdy (mmu_rdy),
|
||||
|
||||
.o_phys_address (mmu_phys_address),
|
||||
.o_table_entry (mmu_page_table_entry),
|
||||
.o_mmu_valid (mmu_valid)
|
||||
);
|
||||
|
||||
logic [TAG_W-1:0] cpu_tag;
|
||||
logic [INDEX_W-1:0] cpu_index;
|
||||
logic [OFFSET_W-1:0] cpu_offset;
|
||||
|
||||
logic miss_handler_rdy;
|
||||
|
||||
|
||||
logic [INDEX_W-1:0] cpu_read_index;
|
||||
logic cpu_read_valid;
|
||||
logic [DATA_W-1:0] cpu_read_data [NUM_WAYS];
|
||||
logic [META_W-1:0] cpu_read_meta [NUM_WAYS];
|
||||
|
||||
logic [INDEX_W-1:0] cpu_write_index;
|
||||
logic [NUM_WAYS-1:0] cpu_write_valid;
|
||||
logic [DATA_W-1:0] cpu_write_data;
|
||||
logic [META_W-1:0] cpu_write_meta;
|
||||
|
||||
logic [INDEX_W-1:0] snoop_read_index;
|
||||
logic snoop_read_valid;
|
||||
logic [DATA_W-1:0] snoop_read_data [NUM_WAYS];
|
||||
logic [META_W-1:0] snoop_read_meta [NUM_WAYS];
|
||||
|
||||
logic [INDEX_W-1:0] snoop_write_index;
|
||||
logic [NUM_WAYS-1:0] snoop_write_valid;
|
||||
logic [DATA_W-1:0] snoop_write_data;
|
||||
logic [META_W-1:0] snoop_write_meta;
|
||||
|
||||
|
||||
// should the snoop unit also modify the LRU?
|
||||
// I don't think so...
|
||||
logic [INDEX_W-1:0] cpu_lru_read_index;
|
||||
logic cpu_lru_read_valid;
|
||||
logic [LRU_W-1:0] cpu_lru_read_data;
|
||||
|
||||
logic [INDEX_W-1:0] cpu_lru_write_index;
|
||||
logic cpu_lru_write_valid;
|
||||
logic [LRU_W-1:0] cpu_lru_write_data;
|
||||
|
||||
logic [DATA_W-1:0] cpu_writeback_data;
|
||||
logic [31:0] cpu_writeback_addr;
|
||||
logic cpu_writeback_valid;
|
||||
logic cpu_writeback_done;
|
||||
|
||||
logic [DATA_W-1:0] snoop_writeback_data;
|
||||
logic [31:0] snoop_writeback_addr;
|
||||
logic snoop_writeback_valid;
|
||||
logic snoop_writeback_done;
|
||||
|
||||
logic [DATA_W-1:0] bus_writeback_data;
|
||||
logic [31:0] bus_writeback_addr;
|
||||
logic bus_writeback_valid;
|
||||
logic bus_writeback_done;
|
||||
|
||||
logic [31:0] cpu_memory_addr;
|
||||
logic cpu_memory_valid;
|
||||
cache_cmd_e cpu_memory_cmd;
|
||||
|
||||
logic [DATA_W-1:0] cpu_memory_data;
|
||||
logic cpu_memory_done;
|
||||
cache_resp_e cpu_memory_resp;
|
||||
|
||||
|
||||
logic [31:0] snoop_addr;
|
||||
snoop_cmd_e snoop_cmd;
|
||||
logic snoop_valid;
|
||||
|
||||
logic [31:0] snoop_memory_addr;
|
||||
logic snoop_memory_valid;
|
||||
cache_cmd_e snoop_memory_cmd;
|
||||
|
||||
logic [DATA_W-1:0] snoop_memory_data;
|
||||
logic snoop_memory_done;
|
||||
cache_resp_e snoop_memory_resp;
|
||||
|
||||
// there should be a bypass path here
|
||||
|
||||
// separate tag, index, offset from the physical address
|
||||
assign cpu_tag = mmu_phys_address[31:INDEX_W+OFFSET_W];
|
||||
|
||||
assign cpu_index = i_cpu_addr[INDEX_W+OFFSET_W-1:OFFSET_W];
|
||||
assign cpu_offset = i_cpu_addr[OFFSET_W-1:0];
|
||||
|
||||
assign o_cpu_rdy = miss_handler_rdy;
|
||||
|
||||
application_wrapper_cache_miss_handler #(
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.NUM_SETS (NUM_SETS)
|
||||
) u_miss_handler (
|
||||
.i_clk (i_clk),
|
||||
.i_rst (i_rst),
|
||||
|
||||
.i_cpu_tag (cpu_tag),
|
||||
.i_cpu_index (cpu_index),
|
||||
.i_cpu_offset (cpu_offset),
|
||||
|
||||
.i_rdy (mmu_rdy),
|
||||
.o_rdy (miss_handler_rdy),
|
||||
|
||||
.i_cpu_we (i_cpu_we),
|
||||
|
||||
.i_cpu_data (i_cpu_data),
|
||||
.o_cpu_data (o_cpu_data),
|
||||
|
||||
.o_read_index (cpu_read_index),
|
||||
.o_read_valid (cpu_read_valid),
|
||||
.i_read_data (cpu_read_data),
|
||||
.i_read_meta (cpu_read_meta),
|
||||
|
||||
.o_write_index (cpu_write_index),
|
||||
.o_write_valid (cpu_write_valid),
|
||||
.o_write_data (cpu_write_data),
|
||||
.o_write_meta (cpu_write_meta),
|
||||
|
||||
.o_lru_read_index (cpu_lru_read_index),
|
||||
.o_lru_read_valid (cpu_lru_read_valid),
|
||||
.i_lru_read_data (cpu_lru_read_data),
|
||||
|
||||
.o_lru_write_index (cpu_lru_write_index),
|
||||
.o_lru_write_valid (cpu_lru_write_valid),
|
||||
.o_lru_write_data (cpu_lru_write_data),
|
||||
|
||||
.o_writeback_data (cpu_writeback_data),
|
||||
.o_writeback_addr (cpu_writeback_addr),
|
||||
.o_writeback_valid (cpu_writeback_valid),
|
||||
.i_writeback_done (cpu_writeback_done),
|
||||
|
||||
.o_memory_addr (cpu_memory_addr),
|
||||
.o_memory_valid (cpu_memory_valid),
|
||||
.o_memory_cmd (cpu_memory_cmd),
|
||||
|
||||
.i_memory_data (cpu_memory_data),
|
||||
.i_memory_done (cpu_memory_done),
|
||||
.i_memory_resp (cpu_memory_resp)
|
||||
);
|
||||
|
||||
application_wrapper_cache_arrays #(
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.NUM_SETS (NUM_SETS)
|
||||
) u_cache_arrays (
|
||||
.i_clk (i_clk),
|
||||
|
||||
.i_cpu_read_index (cpu_read_index),
|
||||
.i_cpu_read_valid (cpu_read_valid),
|
||||
|
||||
.o_cpu_read_data (cpu_read_data),
|
||||
.o_cpu_read_meta (cpu_read_meta),
|
||||
|
||||
.i_cpu_write_index (cpu_write_index),
|
||||
.i_cpu_write_valid (cpu_write_valid),
|
||||
|
||||
.i_cpu_write_data (cpu_write_data),
|
||||
.i_cpu_write_meta (cpu_write_meta),
|
||||
|
||||
.i_snoop_read_index (snoop_read_index),
|
||||
.i_snoop_read_valid (snoop_read_valid),
|
||||
|
||||
.o_snoop_read_data (snoop_read_data),
|
||||
.o_snoop_read_meta (snoop_read_meta),
|
||||
|
||||
.i_snoop_write_index (snoop_write_index),
|
||||
.i_snoop_write_valid (snoop_write_valid),
|
||||
|
||||
.i_snoop_write_data (snoop_write_data),
|
||||
.i_snoop_write_meta (snoop_write_meta)
|
||||
);
|
||||
|
||||
application_wrapper_cache_lru #(
|
||||
.LRU_W (LRU_W),
|
||||
.NUM_SETS (NUM_SETS)
|
||||
) u_lru (
|
||||
.i_clk (i_clk),
|
||||
|
||||
.i_read_index (cpu_lru_read_index),
|
||||
.i_read_valid (cpu_lru_read_valid),
|
||||
.o_read_data (cpu_lru_read_data),
|
||||
|
||||
.i_write_index (cpu_lru_write_index),
|
||||
.i_write_valid (cpu_lru_write_valid),
|
||||
.i_write_data (cpu_lru_write_data)
|
||||
);
|
||||
|
||||
application_wrapper_cache_writeback_buffer #(
|
||||
.DATA_W (DATA_W)
|
||||
) u_writeback_buffer (
|
||||
.i_clk (i_clk),
|
||||
.i_rst (i_rst),
|
||||
|
||||
.i_cpu_writeback_data (cpu_writeback_data),
|
||||
.i_cpu_writeback_addr (cpu_writeback_addr),
|
||||
.i_cpu_writeback_valid (cpu_writeback_valid),
|
||||
.o_cpu_writeback_done (cpu_writeback_done),
|
||||
|
||||
.i_snoop_writeback_data (snoop_writeback_data),
|
||||
.i_snoop_writeback_addr (snoop_writeback_addr),
|
||||
.i_snoop_writeback_valid (snoop_writeback_valid),
|
||||
.o_snoop_writeback_done (snoop_writeback_done),
|
||||
|
||||
.o_bus_writeback_data (bus_writeback_data),
|
||||
.o_bus_writeback_addr (bus_writeback_addr),
|
||||
.o_bus_writeback_valid (bus_writeback_valid),
|
||||
.i_bus_writeback_done (bus_writeback_done)
|
||||
);
|
||||
|
||||
application_wrapper_cache_bus_interface #(
|
||||
.DATA_W (DATA_W)
|
||||
) u_bus_interface (
|
||||
.i_clk (i_clk),
|
||||
.i_rst (i_rst),
|
||||
|
||||
.i_cpu_memory_addr (cpu_memory_addr),
|
||||
.i_cpu_memory_valid (cpu_memory_valid),
|
||||
.i_cpu_memory_cmd (cpu_memory_cmd),
|
||||
|
||||
.o_cpu_memory_data (cpu_memory_data),
|
||||
.o_cpu_memory_done (cpu_memory_done),
|
||||
.o_cpu_memory_resp (cpu_memory_resp),
|
||||
|
||||
.o_snoop_addr (snoop_addr),
|
||||
.o_snoop_cmd (snoop_cmd),
|
||||
.o_snoop_valid (snoop_valid),
|
||||
|
||||
.i_writeback_addr (bus_writeback_addr),
|
||||
.i_writeback_data (bus_writeback_data),
|
||||
.i_writeback_valid (bus_writeback_valid),
|
||||
.o_writeback_done (bus_writeback_done),
|
||||
|
||||
.o_txsactive (o_txsactive),
|
||||
.o_rxsactive (o_rxsactive),
|
||||
|
||||
.o_txlinkactivereq (o_txlinkactivereq),
|
||||
.i_txlinkactiveack (i_txlinkactiveack),
|
||||
|
||||
.o_txreqflitpend (o_txreqflitpend),
|
||||
.o_txreqflitv (o_txreqflitv),
|
||||
.o_txreqflit (o_txreqflit),
|
||||
.i_txreqlcrdv (i_txreqlcrdv),
|
||||
|
||||
.o_txrspflitpend (o_txrspflitpend),
|
||||
.o_txrspflitv (o_txrspflitv),
|
||||
.o_txrspflit (o_txrspflit),
|
||||
.i_txrsplcrdv (i_txrsplcrdv),
|
||||
|
||||
.o_txdatflitpend (o_txdatflitpend),
|
||||
.o_txdatflitv (o_txdatflitv),
|
||||
.o_txdatflit (o_txdatflit),
|
||||
.i_txdatlcrdv (i_txdatlcrdv),
|
||||
|
||||
.i_rxlinkactivereq (i_rxlinkactivereq),
|
||||
.o_rxlinkactiveack (o_rxlinkactiveack),
|
||||
|
||||
.i_rxrspflitpend (i_rxrspflitpend),
|
||||
.i_rxrspflitv (i_rxrspflitv),
|
||||
.i_rxrspflit (i_rxrspflit),
|
||||
.i_rxrsplcrdv (i_rxrsplcrdv),
|
||||
|
||||
.i_rxdatflitpend (i_rxdatflitpend),
|
||||
.i_rxdatflitv (i_rxdatflitv),
|
||||
.i_rxdatflit (i_rxdatflit),
|
||||
.o_rxdatlcrdv (o_rxdatlcrdv),
|
||||
|
||||
.i_rxsnpflitpend (i_rxsnpflitpend),
|
||||
.i_rxsnpflitv (i_rxsnpflitv),
|
||||
.i_rxsnpflit (i_rxsnpflit),
|
||||
.o_rxsnplcrdv (o_rxsnplcrdv)
|
||||
);
|
||||
|
||||
endmodule
|
||||
23
src/application_wrapper/cache/application_wrapper_cache_writeback_buffer.sv
vendored
Normal file
23
src/application_wrapper/cache/application_wrapper_cache_writeback_buffer.sv
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
module application_wrapper_cache_writeback_buffer #(
|
||||
parameter DATA_W = 64*8
|
||||
) (
|
||||
input logic i_clk,
|
||||
input logic i_rst,
|
||||
|
||||
input logic [DATA_W-1:0] i_cpu_writeback_data,
|
||||
input logic [31:0] i_cpu_writeback_addr,
|
||||
input logic i_cpu_writeback_valid,
|
||||
output logic o_cpu_writeback_done,
|
||||
|
||||
input logic [DATA_W-1:0] i_snoop_writeback_data,
|
||||
input logic [31:0] i_snoop_writeback_addr,
|
||||
input logic i_snoop_writeback_valid,
|
||||
output logic o_snoop_writeback_done,
|
||||
|
||||
output logic [DATA_W-1:0] o_bus_writeback_data,
|
||||
output logic [31:0] o_bus_writeback_addr,
|
||||
output logic o_bus_writeback_valid,
|
||||
input logic i_bus_writeback_done
|
||||
);
|
||||
|
||||
endmodule
|
||||
@@ -9,13 +9,27 @@ module application_wrapper_mmu #(
|
||||
input logic i_rst,
|
||||
|
||||
input logic [ADDR_WIDTH-1:0] i_cpu_addr,
|
||||
input i_we,
|
||||
input i_rdy,
|
||||
input o_rdy,
|
||||
input logic i_rdy,
|
||||
output logic o_rdy,
|
||||
|
||||
output logic [ADDR_WIDTH-1:0] o_phys_address,
|
||||
output page_table_entry_t o_table_entry,
|
||||
output logic o_mmu_valid
|
||||
);
|
||||
|
||||
assign o_rdy = '1;
|
||||
|
||||
always @(posedge i_clk) begin
|
||||
o_mmu_valid <= i_rdy;
|
||||
o_phys_address <= i_cpu_addr;
|
||||
|
||||
o_table_entry.cache_disable <= '0;
|
||||
o_table_entry.read_eanble <= '1;
|
||||
o_table_entry.write_enable <= '1;
|
||||
o_table_entry.execute_enable <= '1;
|
||||
o_table_entry.supervisor <= '1;
|
||||
o_table_entry.present <= '1;
|
||||
o_table_entry.write_through <= '0;
|
||||
end
|
||||
|
||||
endmodule
|
||||
52
src/application_wrapper/cache/design_doc.md
vendored
Normal file
52
src/application_wrapper/cache/design_doc.md
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
# Top level requirements
|
||||
|
||||
1. Parameterizable cache size (default 64)
|
||||
2. Non-parameterizable cacheline width (static 64)
|
||||
3. Single cycle access required for use with 6502.
|
||||
4. Support MESI cache coherency Protocl (I L L)
|
||||
5. Direct mapped cache for low latency
|
||||
6. Interface with other coherent components
|
||||
|
||||
## Single Cycle Access
|
||||
|
||||
For reads, the 6502 presents an address on the address bus and expects data on
|
||||
the din bus on the very next cycle. Because the 6502 has so few registers, it
|
||||
is critical for memory to have as little latency as possible. For this reason,
|
||||
the cache is also direct mapped. Future versions could make this associative,
|
||||
but not right now.
|
||||
|
||||
## MESI Cache Coherency
|
||||
|
||||
Cachelines must support the illinois protocol. Compatibility with CHI is shown
|
||||
below:
|
||||
|
||||
MODIFIED: when this line is evicted, we write the cacheline to main memory
|
||||
|
||||
EXCLUSIVE: We can write to this cacheline without informing the other caches,
|
||||
since we are the only ones with this cacheline
|
||||
|
||||
SHARED: If we write to this cachline, we must notify the coherenc controller
|
||||
so that it can invalidate all other copies of the cacheline
|
||||
|
||||
INVALID: If we read from a cacheline, send out a request to the coherency
|
||||
controller to see if anybody else has a copy. If they do, then read the
|
||||
data from the other cache. Otherwise, read from main memory.
|
||||
If we write to this cacheline, send a request to the coherency controller
|
||||
to see if anybody else has this cachline. If they have it, then read the
|
||||
data from the cache which has it (the first one if multiple do) and invalidate
|
||||
the entries in all caches which have it.
|
||||
|
||||
## Interface
|
||||
|
||||
The interface is based off of the CHI interface, with 4 separate physical
|
||||
interfaces: REQ, RSP, DAT, SNP.
|
||||
|
||||
The CPU cache will send out commands on the REQ interface. The coherency
|
||||
controller will also send out commands on the REQ interface. The response
|
||||
channel contains the response to the request, not the data. Data comes on
|
||||
the DAT interface. SNP requests are sent on the SNP interface, and responses
|
||||
and data come back on the same interfaces. Like CHI, if the response has
|
||||
data the then response and data both come back on the DAT interface, if the
|
||||
response has no data then it just comes back on the RSP interface.
|
||||
|
||||
SNP requests only come from the coherency controller.
|
||||
@@ -1,6 +1,11 @@
|
||||
cache/application_wrapper_cache_pkg.sv
|
||||
cache/application_wrapper_cache_arrays.sv
|
||||
cache/application_wrapper_mmu.sv
|
||||
cache/application_wrapper_cache_bus_interface.sv
|
||||
cache/application_wrapper_cache_lru.sv
|
||||
cache/application_wrapper_cache_miss_handler.sv
|
||||
cache/application_wrapper_cache_snooping.sv
|
||||
cache/application_wrapper_cache_top.sv
|
||||
cache/application_wrapper_cache_writeback_buffer.sv
|
||||
cache/application_wrapper_mmu.sv
|
||||
|
||||
application_wrapper_top.sv
|
||||
Reference in New Issue
Block a user