From 8fd83c2563d00397e59d19115a3d8649dc514e72 Mon Sep 17 00:00:00 2001 From: Byron Lathi Date: Tue, 19 May 2026 19:57:15 -0700 Subject: [PATCH] Get it to kinda work --- .../application_wrapper_cache_l1_test.py | 478 +++++++++++++++++- .../cache/application_wrapper_cache_l1.sv | 320 ++++++++++-- .../cache/application_wrapper_cache_pkg.sv | 6 + 3 files changed, 771 insertions(+), 33 deletions(-) diff --git a/sim/application_wrapper/cache/application_wrapper_cache_l1_test.py b/sim/application_wrapper/cache/application_wrapper_cache_l1_test.py index 68d69f4..9911be6 100644 --- a/sim/application_wrapper/cache/application_wrapper_cache_l1_test.py +++ b/sim/application_wrapper/cache/application_wrapper_cache_l1_test.py @@ -1,19 +1,493 @@ import cocotb -from cocotb.handle import Immediate +from cocotb.handle import Immediate, LogicArray + +from cocotb.simulator import get_sim_time from cocotb.clock import Clock from cocotb.triggers import Timer, RisingEdge, FallingEdge, with_timeout +from enum import IntEnum + +from collections import defaultdict +from collections.abc import Mapping + +import logging + +import random + + +logger = logging.getLogger() + +logger.setLevel(logging.INFO) CLK_PERIOD = 5 +reference_cache_data = defaultdict(bytearray) + +higher_cache_data = defaultdict(bytearray) + +async def cpu_sequencer(dut, sequence: Mapping[int, int, bool, bool]): + + + addr, do, we, sync = sequence[0] + + dut.i_addr.value = addr + dut.i_data.value = do + dut.i_we.value = we + dut.i_sync.value = sync + + await FallingEdge(dut.i_rst) + + index = 1 + + while index < len(sequence): + await RisingEdge(dut.i_clk) + if not dut.o_rdy.value: + continue + + addr, do, we, sync = sequence[index] + + dut.i_addr.value = addr + dut.i_data.value = do + dut.i_we.value = we + dut.i_sync.value = sync + + index += 1 + + await Timer(150, "ns") + +async def cpu_data_monitor(dut): + previous_address = 0 + address = 0 + + we = 0 + previous_we = 0 + + i_data = 0 + previous_i_data = 0 + + await FallingEdge(dut.i_rst) + + while True: + await RisingEdge(dut.i_clk) + if not dut.o_rdy.value: + continue + + previous_address = address + previous_we = we + address = int(dut.i_addr.value) + we = int(dut.i_we.value) + + previous_i_data = i_data + i_data = int(dut.i_data.value) + + data = int(dut.o_data.value) + + if previous_address == 0: + continue + + # don't care if it was a write + if previous_we: + + index = (previous_address // 64) % 64 + offset = previous_address % 64 + + cacheline = reference_cache_data[index] + + cacheline[offset] = previous_i_data + logger.debug(f"We saw a write here {index=} {offset=} previous_data={previous_i_data:x}") + else: + index = (previous_address // 64) % 64 + offset = previous_address % 64 + + cacheline = reference_cache_data[index] + + expected_data = cacheline[offset] + + if (data != expected_data): + logger.error(f"{get_sim_time()} {address=:x} {previous_address=:x} {data=:x} {expected_data=:x}") + + + +async def mmu_sequencer(dut): + while True: + await RisingEdge(dut.i_clk) + dut.i_phys_address.value = dut.i_addr.value + +async def handle_higher_level_cache(dut): + dut.i_cache_rdy.value = 0 + + class CacheCmd(IntEnum): + CACHE_NONE = 0 + CACHE_READ = 1 + CACHE_WRITE = 2 + + while True: + await RisingEdge(dut.i_clk) + dut.i_cache_rdy.value = 0 + + if not dut.o_cache_valid.value: + continue + + cmd = CacheCmd(dut.o_cache_cmd.value) + addr = int(dut.o_cache_addr.value) + + logger.debug(f"{cmd=} {addr=}") + + + if cmd == CacheCmd.CACHE_READ: + + if addr not in higher_cache_data: + data = bytearray(random.randbytes(64)) + higher_cache_data[addr] = data + + dut.i_cache_data.value = LogicArray.from_bytes(higher_cache_data[addr] , byteorder="little") + + dut.i_cache_rdy.value = 1 + + reference_cache_data[int(dut.read_index.value)] = higher_cache_data[addr] + + await RisingEdge(dut.i_clk) + + dut.i_cache_rdy.value = 0 + + elif cmd == CacheCmd.CACHE_WRITE: + + dut.i_cache_rdy.value = 1 + + data = dut.o_cache_data.value.to_bytes(byteorder="little") + + higher_cache_data[addr] = bytearray(data) + + await RisingEdge(dut.i_clk) + + dut.i_cache_rdy.value = 0 + @cocotb.test async def sanity_test(dut): + expected_cache_misses = 0 + expected_evictions = 0 + cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start()) + cocotb.start_soon(mmu_sequencer(dut)) + cocotb.start_soon(handle_higher_level_cache(dut)) + cocotb.start_soon(cpu_data_monitor(dut)) + + cpu_sequence = [ + (0x100, 0xaa, True, False), + (0x101, 0xbb, True, False), + (0x100, 0x00, False, False), + (0x101, 0x00, False, False), + (0x200, 0xcc, True, False), + (0x201, 0xdd, True, False), + (0x100, 0x00, False, False), + (0x101, 0x00, False, False), + (0x200, 0x00, False, False), + (0x201, 0x00, False, False), + (0x100, 0x11, True, False), + (0x101, 0x22, True, False), + (0x100, 0x00, False, False), + (0x200, 0x33, True, False), + (0x101, 0x00, False, False), + (0x201, 0x44, True, False), + (0x100, 0x00, False, False), + (0x200, 0x00, False, False), + (0x101, 0x00, False, False), + (0x201, 0x00, False, False), + ] dut.i_rst.value = Immediate(1) for _ in range(10): await RisingEdge(dut.i_clk) dut.i_rst.value = 0 - await Timer(1, "us") \ No newline at end of file + await cpu_sequencer(dut, cpu_sequence) + + + expected_cache_misses = 2 + expected_evictions = 0 + + dut_evictions = int(dut.eviction_count.value) + dut_misses = int(dut.cache_miss_count.value) + + if dut_evictions != expected_evictions: + logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}") + + if dut_misses != expected_cache_misses: + logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}") + + +@cocotb.test +async def clean_evict_test(dut): + cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start()) + cocotb.start_soon(mmu_sequencer(dut)) + cocotb.start_soon(handle_higher_level_cache(dut)) + cocotb.start_soon(cpu_data_monitor(dut)) + + # Read from one cacheline, then read from an aliased cacheline without writing. + # cacheline should be overwritten without evicting + cpu_sequence = [ + (0x100, 0x00, False, False), + (0x1100, 0x00, False, False), + ] + + dut.i_rst.value = Immediate(1) + for _ in range(10): + await RisingEdge(dut.i_clk) + dut.i_rst.value = 0 + + await cpu_sequencer(dut, cpu_sequence) + + expected_cache_misses = 2 + expected_evictions = 0 + + dut_evictions = int(dut.eviction_count.value) + dut_misses = int(dut.cache_miss_count.value) + + if dut_evictions != expected_evictions: + logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}") + + if dut_misses != expected_cache_misses: + logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}") + +@cocotb.test +async def dirty_evict_test(dut): + cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start()) + cocotb.start_soon(mmu_sequencer(dut)) + cocotb.start_soon(handle_higher_level_cache(dut)) + cocotb.start_soon(cpu_data_monitor(dut)) + + # Read from one cacheline, then read from an aliased cacheline without writing. + # cacheline should be overwritten without evicting + cpu_sequence = [ + (0x100, 0x41, True, False), + (0x101, 0x42, True, False), + (0x1100, 0x00, False, False), + (0x1100, 0xaa, True, False), + (0x100, 0x00, False, False) + ] + + dut.i_rst.value = Immediate(1) + for _ in range(10): + await RisingEdge(dut.i_clk) + dut.i_rst.value = 0 + + await cpu_sequencer(dut, cpu_sequence) + + expected_cache_misses = 3 + expected_evictions = 2 + + dut_evictions = int(dut.eviction_count.value) + dut_misses = int(dut.cache_miss_count.value) + + if dut_evictions != expected_evictions: + logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}") + + if dut_misses != expected_cache_misses: + logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}") + + +@cocotb.test +async def long_write_thrash_test(dut): + cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start()) + cocotb.start_soon(mmu_sequencer(dut)) + cocotb.start_soon(handle_higher_level_cache(dut)) + cocotb.start_soon(cpu_data_monitor(dut)) + + num_lines_read = 2**20//64 + + cpu_sequence = [ + (i*64, i % 256, True, False) + for i in range(num_lines_read)] + + dut.i_rst.value = Immediate(1) + for _ in range(10): + await RisingEdge(dut.i_clk) + dut.i_rst.value = 0 + + await cpu_sequencer(dut, cpu_sequence) + + # The last 64 lines aren't evicted + expected_cache_misses = num_lines_read + expected_evictions = num_lines_read - 64 + + dut_evictions = int(dut.eviction_count.value) + dut_misses = int(dut.cache_miss_count.value) + + if dut_evictions != expected_evictions: + logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}") + + if dut_misses != expected_cache_misses: + logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}") + + +@cocotb.test +async def long_write_read_thrash_test(dut): + cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start()) + cocotb.start_soon(mmu_sequencer(dut)) + cocotb.start_soon(handle_higher_level_cache(dut)) + cocotb.start_soon(cpu_data_monitor(dut)) + + num_lines_read = 2**20//64 + + + cpu_sequence = [ + (i*64, i % 256, True, False) + for i in range(num_lines_read)] + + cpu_sequence.extend([ + (i*64, 0, False, False) + for i in range(num_lines_read)]) + + dut.i_rst.value = Immediate(1) + for _ in range(10): + await RisingEdge(dut.i_clk) + dut.i_rst.value = 0 + + await cpu_sequencer(dut, cpu_sequence) + + expected_cache_misses = num_lines_read * 2 + expected_evictions = num_lines_read + + dut_evictions = int(dut.eviction_count.value) + dut_misses = int(dut.cache_miss_count.value) + + if dut_evictions != expected_evictions: + logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}") + + if dut_misses != expected_cache_misses: + logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}") + + +@cocotb.test +async def long_write_linear_test(dut): + cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start()) + cocotb.start_soon(mmu_sequencer(dut)) + cocotb.start_soon(handle_higher_level_cache(dut)) + cocotb.start_soon(cpu_data_monitor(dut)) + + num_bytes_read = 2**16 + + cpu_sequence = [ + (i, i % 256, True, False) + for i in range(num_bytes_read)] + + dut.i_rst.value = Immediate(1) + for _ in range(10): + await RisingEdge(dut.i_clk) + dut.i_rst.value = 0 + + await cpu_sequencer(dut, cpu_sequence) + + expected_cache_misses = num_bytes_read // 64 + expected_evictions = num_bytes_read//64 - 64 + + dut_evictions = int(dut.eviction_count.value) + dut_misses = int(dut.cache_miss_count.value) + + if dut_evictions != expected_evictions: + logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}") + + if dut_misses != expected_cache_misses: + logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}") + + +@cocotb.test +async def long_write_read_linear_test(dut): + cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start()) + cocotb.start_soon(mmu_sequencer(dut)) + cocotb.start_soon(handle_higher_level_cache(dut)) + cocotb.start_soon(cpu_data_monitor(dut)) + + num_bytes_read = 2**16 + + + cpu_sequence = [ + (i, i % 256, True, False) + for i in range(num_bytes_read)] + + cpu_sequence.extend([ + (i, 0, False, False) + for i in range(num_bytes_read)]) + + dut.i_rst.value = Immediate(1) + for _ in range(10): + await RisingEdge(dut.i_clk) + dut.i_rst.value = 0 + + await cpu_sequencer(dut, cpu_sequence) + + expected_cache_misses = (num_bytes_read // 64) * 2 + expected_evictions = num_bytes_read // 64 + + dut_evictions = int(dut.eviction_count.value) + dut_misses = int(dut.cache_miss_count.value) + + if dut_evictions != expected_evictions: + logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}") + + if dut_misses != expected_cache_misses: + logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}") + +@cocotb.test +async def short_write_read_linear_test(dut): + # What makes this test "short" is that we read 64 cachelines, + # so we shouldn't have to make any evictions + # TODO add number of evictions and cachlines loaded as performance counteres + + cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start()) + cocotb.start_soon(mmu_sequencer(dut)) + cocotb.start_soon(handle_higher_level_cache(dut)) + cocotb.start_soon(cpu_data_monitor(dut)) + + num_bytes_read = 64*64 + + cpu_sequence = [ + (i, i % 256, True, False) + for i in range(num_bytes_read)] # 64 bytes times 64 cachelines + + cpu_sequence.extend([ + (i, i % 256, False, False) + for i in range(num_bytes_read)]) # 64 bytes times 64 cachelines + + dut.i_rst.value = Immediate(1) + for _ in range(10): + await RisingEdge(dut.i_clk) + dut.i_rst.value = 0 + + await cpu_sequencer(dut, cpu_sequence) + + expected_cache_misses = num_bytes_read//64 + expected_evictions = num_bytes_read//64 - 64 + + dut_evictions = int(dut.eviction_count.value) + dut_misses = int(dut.cache_miss_count.value) + + if dut_evictions != expected_evictions: + logger.error(f"Eviction count mismatch! Expected {expected_evictions}, saw {dut_evictions}") + + if dut_misses != expected_cache_misses: + logger.error(f"Miss count mismatch! Expected {expected_cache_misses}, saw {dut_misses}") + +@cocotb.test +async def random_access_test(dut): + # Just fully random accesses + # This is also kind of a thrash test since this is not realistic + + cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start()) + cocotb.start_soon(mmu_sequencer(dut)) + cocotb.start_soon(handle_higher_level_cache(dut)) + cocotb.start_soon(cpu_data_monitor(dut)) + + num_bytes_read = 2**18 + + cpu_sequence = [ + (random.randint(0, 2**32), random.randint(0, 255), random.randint(0,1), random.randint(0,1)) + for _ in range(num_bytes_read)] # 64 bytes times 64 cachelines + + dut.i_rst.value = Immediate(1) + for _ in range(10): + await RisingEdge(dut.i_clk) + dut.i_rst.value = 0 + + await cpu_sequencer(dut, cpu_sequence) \ No newline at end of file diff --git a/src/application_wrapper/cache/application_wrapper_cache_l1.sv b/src/application_wrapper/cache/application_wrapper_cache_l1.sv index e156738..5820197 100644 --- a/src/application_wrapper/cache/application_wrapper_cache_l1.sv +++ b/src/application_wrapper/cache/application_wrapper_cache_l1.sv @@ -5,35 +5,36 @@ module application_wrapper_cache_l1 #( parameter CACHELINE_COUNT = 64, localparam ADDR_WIDTH = 32 )( - input logic i_clk, - input logic i_rst, + input logic i_clk, + input logic i_rst, /* CPU Interface */ - input logic [ADDR_WIDTH-1:0] i_addr, - input logic i_we, - input logic [7:0] i_data, - output logic [7:0] o_data, + input logic [ADDR_WIDTH-1:0] i_addr, + input logic i_we, + input logic i_sync, + input logic [7:0] i_data, + output logic [7:0] o_data, - input logic i_rdy, - output logic o_rdy, + input logic i_rdy, + output logic o_rdy, /* MMU Interface */ - input logic [ADDR_WIDTH-1:0] i_phys_address, - output page_table_entry_t i_table_entry, - input logic i_mmu_valid, + input logic [ADDR_WIDTH-1:0] i_phys_address, + output page_table_entry_t i_table_entry, + input logic i_mmu_valid, /* Higher level cache interface */ - output logic [ADDR_WIDTH-1:0] o_addr, - output logic [1:0] o_cache_cmd, - output logic o_cache_valid, + output logic [ADDR_WIDTH-1:0] o_cache_addr, + output cache_cmd_e o_cache_cmd, + output logic o_cache_valid, - output logic [63:0] o_cache_data, - input logic [31:0] i_cache_data, - input logic i_cache_rdy + output logic [CACHELINE_SIZE*8-1:0] o_cache_data, + input logic [CACHELINE_SIZE*8-1:0] i_cache_data, + input logic i_cache_rdy ); // we have 32 bit addresses, 64 byte cache lines, and 64 total lines. -// Thats 6 bit for offset, 6 bit for index, and 20 bit for cache. +// Thats 6 bit for offset, 6 bit for index, and 20 bit for tag. // cache is virtually indexed, physically tagged @@ -42,49 +43,292 @@ localparam INDEX_W = $clog2(CACHELINE_COUNT); localparam TAG_W = ADDR_WIDTH - INDEX_W - OFFSET_W; localparam META_W = 3; // valid, unique, clean -logic [OFFSET_W-1:0] offset; -logic [INDEX_W-1:0] index; -logic [TAG_W-1:0] tag; +typedef struct { + logic [TAG_W-1:0] tag; + logic valid; + logic shared; + logic clean; +} meta_tag_t; -assign offset = i_addr[OFFSET_W-1:0]; -assign index = i_addr[INDEX_W+OFFSET_W-1:OFFSET_W]; -assign tag = i_addr[INDEX_W+OFFSET_W+TAG_W-1:INDEX_W+OFFSET_W]; +logic [OFFSET_W-1:0] offset, offset_d1; +logic [INDEX_W-1:0] index, index_d1, index_d2; +logic [TAG_W-1:0] tag, tag_d1; // cacheline size is in bytes, not bits // direct mapped cache, read one line so we have data ready if its a hit. logic [CACHELINE_SIZE*8-1:0] data_array [CACHELINE_COUNT]; -logic [META_W+TAG_W-1:0] meta_tag_array [CACHELINE_COUNT]; +meta_tag_t meta_tag_array [CACHELINE_COUNT]; -enum logic [1:0] {IDLE, READY, EVICT, READ} state, state_next; +logic [CACHELINE_SIZE*8-1:0] current_data, current_data_next, write_data_prev; +meta_tag_t current_meta_tag, current_meta_tag_next; + +logic [OFFSET_W-1:0] read_offset, read_offset_next; +logic [INDEX_W-1:0] read_index, read_index_next; +logic [ADDR_WIDTH-1:0] read_address, read_address_next; +logic [ADDR_WIDTH-1:0] write_address, write_address_next; + +logic [CACHELINE_SIZE*8-1:0] write_data; +meta_tag_t write_meta_tag; + +logic [INDEX_W-1:0] write_index; +logic data_write_enable; +logic meta_tag_write_enable; + +logic we_d1; +logic latched_we, latched_we_next; + +logic [7:0] latched_data, latched_data_next; +logic [7:0] data_d1; + + +// performance counters +logic [31:0] eviction_count, eviction_count_next; +logic [31:0] cache_miss_count, cache_miss_count_next; + +// reset counter +logic [INDEX_W-1:0] clear_counter, clear_counter_next; + +enum logic [2:0] {RESET, CLEAR, IDLE, READY, EVICT, READ} prev_state, state, state_next; always_ff @(posedge i_clk) begin if (i_rst) begin - state <= IDLE; + state <= RESET; + + current_data <= '0; + + tag_d1 <= '0; + index_d1 <= '0; + offset_d1 <= '0; + + read_address <= '0; + write_address <= '0; + + latched_we <= '0; + + latched_data <= '0; + + eviction_count <= '0; + cache_miss_count <= '0; + + clear_counter <= '0; + end else begin + prev_state <= state; state <= state_next; + + current_data <= current_data_next; + write_data_prev <= write_data; + current_meta_tag <= current_meta_tag_next; + + read_offset <= read_offset_next; + read_index <= read_index_next; + + read_address <= read_address_next; + write_address <= write_address_next; + + if (data_write_enable) begin + data_array[write_index] <= write_data; + end + + if (meta_tag_write_enable) begin + meta_tag_array[write_index] <= write_meta_tag; + end + + tag_d1 <= tag; + index_d1 <= index; + index_d2 <= index_d1; + offset_d1 <= offset; + we_d1 <= i_we; + data_d1 <= i_data; + + latched_we <= latched_we_next; + latched_data <= latched_data_next; + + eviction_count <= eviction_count_next; + cache_miss_count <= cache_miss_count_next; + + clear_counter <= clear_counter_next; end end always_comb begin state_next = state; + current_data_next = current_data; + current_meta_tag_next = current_meta_tag; + + read_offset_next = read_offset; + read_index_next = read_index; + read_address_next = read_address; + write_address_next = write_address; + + latched_we_next = latched_we; + latched_data_next = latched_data; + o_rdy = '0; + o_cache_valid = '0; + o_cache_cmd = CACHE_NONE; + o_cache_addr = '0; + o_cache_data = '0; + + // vipt + offset = i_addr[OFFSET_W-1:0]; + index = i_addr[INDEX_W+OFFSET_W-1:OFFSET_W]; + tag = i_phys_address[INDEX_W+OFFSET_W+TAG_W-1:INDEX_W+OFFSET_W]; + + write_index = '0; + write_data = '0; + data_write_enable = '0; + write_meta_tag.tag = '0; + write_meta_tag.valid = '0; + write_meta_tag.shared = '0; + write_meta_tag.clean = '0; + meta_tag_write_enable = '0; + + o_data = '0; + + eviction_count_next = eviction_count; + cache_miss_count_next = cache_miss_count; + + clear_counter_next = clear_counter; + case (state) - IDLE: begin - state_next = READY; + RESET: begin + state_next = CLEAR; end - READY: begin + CLEAR: begin + if (clear_counter == (INDEX_W)'(CACHELINE_COUNT-1)) begin + state_next = IDLE; + end + + write_data = '0; + data_write_enable = '1; + + meta_tag_write_enable = '1; + write_meta_tag.tag = '0; + write_meta_tag.valid = '0; + write_meta_tag.shared = '0; + write_meta_tag.clean = '0; + + write_index = clear_counter; + + clear_counter_next = clear_counter + 1; + end + + IDLE: begin + state_next = READY; + current_data_next = data_array[index]; + current_meta_tag_next = meta_tag_array[index]; o_rdy = '1; end - EVICT: begin + READY: begin + if (!current_meta_tag.valid || (current_meta_tag.valid && current_meta_tag.tag != tag_d1 && current_meta_tag.clean)) begin + // current line is not valid, just read + // OR current line is valid, but clean so we don't need to write it back. + state_next = READ; + read_index_next = index_d1; + read_offset_next = offset_d1; + read_address_next = {i_phys_address[31:OFFSET_W], (OFFSET_W)'('0)}; + + latched_we_next = we_d1; + latched_data_next = data_d1; + + cache_miss_count_next = cache_miss_count + 1; + end else if (current_meta_tag.valid && current_meta_tag.tag != tag_d1 && !current_meta_tag.clean) begin + // current line was valid, but the wrong tag. + state_next = EVICT; + + read_index_next = index_d1; + read_offset_next = offset_d1; + read_address_next = {i_phys_address[31:OFFSET_W], (OFFSET_W)'('0)}; + write_address_next = {current_meta_tag.tag, index_d1, (OFFSET_W)'('0)}; + + latched_we_next = we_d1; + latched_data_next = data_d1; + + cache_miss_count_next = cache_miss_count + 1; + eviction_count_next = eviction_count + 1; + end else begin + latched_we_next = i_we; + latched_data_next = i_data; + + // always be loading the next data array + current_data_next = data_array[index]; + current_meta_tag_next = meta_tag_array[index]; + + // We are accessing something we just wrote to + + if (latched_we) begin + write_data = current_data; + + write_data[offset_d1*8 +: 8] = latched_data; + data_write_enable = '1; + + meta_tag_write_enable = '1; + write_meta_tag = current_meta_tag; + write_meta_tag.clean = '0; + + write_index = index_d1; + + if (index == write_index) begin + current_data_next = write_data; + end + end else begin + // we have a possible RAW hazard, but not after READ state + if (prev_state == READY && index_d1 == index_d2) begin + o_data = current_data[offset_d1*8 +: 8]; + end else begin + o_data = current_data[offset_d1*8 +: 8]; + end + end + + o_rdy = '1; + end + end + + EVICT: begin + o_cache_addr = write_address; + o_cache_cmd = CACHE_WRITE; + o_cache_valid = '1; + o_cache_data = current_data; + + if (i_cache_rdy) begin + state_next = READ; + end end READ: begin + o_cache_addr = read_address; + o_cache_cmd = CACHE_READ; + o_cache_valid = '1; + write_index = read_index; + write_data = i_cache_data; + write_meta_tag.tag = read_address[31:INDEX_W+OFFSET_W]; + write_meta_tag.valid = '1; + write_meta_tag.shared = '0; + write_meta_tag.clean = ~latched_we; // if we are about to write, then mark dirty + + data_write_enable = i_cache_rdy; + meta_tag_write_enable = i_cache_rdy; + + if (i_cache_rdy) begin + state_next = READY; + current_data_next = i_cache_data; + current_meta_tag_next = write_meta_tag; + + index = write_index; + tag = read_address[31:INDEX_W+OFFSET_W]; + offset = read_offset; + end + end + + default: begin + state_next = READY; end endcase end @@ -101,6 +345,20 @@ end One thing that we also need is an MMU. The TLB can be 1 cycle, then if the TLB says that we are allowed to read from the cache, we can read from the cache. + + how do we handle writes? Since we take 1 cycle to read from the cache, we cannot + immediately write to the cache line in one cycle, we will have to wait a cycle + in order to determine if the cacheline is valid or not. To do this, we will need + to have it be pipelined, so that we store the data temporarily while we read the + meta_tag array, then if its valid we write to the cache. To avoid RAW hazards, we + also need to store the address and check if we are reading a value we just wrote. + If so, then we return this stored value instead of reading from ram, since we would + be reading at the same time as we are writing, and that could be undefined. + + basically if the index matches the previous access, then we have a hazard and need to + use the stored cacheline instead of the cacheline we read from memory, since that hasn't + been updated yet. We don't need to cache metatag since if we just wrote to it, it will + already be dirty anyway. */ diff --git a/src/application_wrapper/cache/application_wrapper_cache_pkg.sv b/src/application_wrapper/cache/application_wrapper_cache_pkg.sv index 5f035f6..f595386 100644 --- a/src/application_wrapper/cache/application_wrapper_cache_pkg.sv +++ b/src/application_wrapper/cache/application_wrapper_cache_pkg.sv @@ -10,4 +10,10 @@ package application_wrapper_cache_pkg; logic write_through; } page_table_entry_t; + typedef enum logic [1:0] { + CACHE_NONE, + CACHE_READ, + CACHE_WRITE + } cache_cmd_e; + endpackage \ No newline at end of file