diff --git a/sim/application_wrapper/cache/application_wrapper_cache_arrays_test.py b/sim/application_wrapper/cache/application_wrapper_cache_arrays_test.py new file mode 100644 index 0000000..d243c6b --- /dev/null +++ b/sim/application_wrapper/cache/application_wrapper_cache_arrays_test.py @@ -0,0 +1,205 @@ +import cocotb +from cocotb.handle import LogicArray + +from cocotb.clock import Clock +from cocotb.triggers import ReadOnly, NextTimeStep, RisingEdge + +import logging + +import random + + +logger = logging.getLogger() + +logger.setLevel(logging.INFO) + +CLK_PERIOD = 5 + + +SETS = 64 +WAYS = 4 + +data_arrays = [{}, {}, {}, {}] +meta_arrays = [{}, {}, {}, {}] + +@cocotb.test +async def test_sanity(dut): + cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start()) + + await RisingEdge(dut.i_clk) + await RisingEdge(dut.i_clk) + + + for index in range(SETS): + for way in range(WAYS): + data = random.randbytes(64) + meta = random.randint(0, 2**22-1) + + data_arrays[way][index] = data + meta_arrays[way][index] = meta + + dut.i_cpu_write_data.value = LogicArray.from_bytes(data, byteorder="little") + dut.i_cpu_write_meta.value = meta + dut.i_cpu_write_index.value = index + dut.i_cpu_write_valid.value = 1 << way + + await RisingEdge(dut.i_clk) + + dut.i_cpu_write_valid.value = 0 + + await RisingEdge(dut.i_clk) + + for index in range(SETS): + dut.i_cpu_read_index.value = index + dut.i_cpu_read_valid.value = 1 + + await RisingEdge(dut.i_clk) + await ReadOnly() + + raw_data = dut.o_cpu_read_data.value + raw_meta = dut.o_cpu_read_meta.value + + meta = [int(m) for m in raw_meta] + data_bytes = [v.to_bytes(byteorder="little") for v in raw_data] + + expected_data = [data_arrays[way][index] for way in range(WAYS)] + expected_meta = [meta_arrays[way][index] for way in range(WAYS)] + + if data_bytes != expected_data: + logger.info("Data Error") + + if meta != expected_meta: + logger.info("Meta Error") + + await NextTimeStep() + + dut.i_cpu_read_valid.value = 0 + + for index in range(SETS): + for way in range(WAYS): + data = random.randbytes(64) + meta = random.randint(0, 2**22-1) + + data_arrays[way][index] = data + meta_arrays[way][index] = meta + + dut.i_snoop_write_data.value = LogicArray.from_bytes(data, byteorder="little") + dut.i_snoop_write_meta.value = meta + dut.i_snoop_write_index.value = index + dut.i_snoop_write_valid.value = 1 << way + + await RisingEdge(dut.i_clk) + + dut.i_snoop_write_valid.value = 0 + + await RisingEdge(dut.i_clk) + + for index in range(SETS): + dut.i_snoop_read_index.value = index + dut.i_snoop_read_valid.value = 1 + + await RisingEdge(dut.i_clk) + await ReadOnly() + + raw_data = dut.o_snoop_read_data.value + raw_meta = dut.o_snoop_read_meta.value + + meta = [int(m) for m in raw_meta] + data_bytes = [v.to_bytes(byteorder="little") for v in raw_data] + + expected_data = [data_arrays[way][index] for way in range(WAYS)] + expected_meta = [meta_arrays[way][index] for way in range(WAYS)] + + if data_bytes != expected_data: + logger.info("Data Error") + + if meta != expected_meta: + logger.info("Meta Error") + + await NextTimeStep() + + dut.i_snoop_read_valid.value = 0 + + +@cocotb.test +async def test_random_access(dut): + cocotb.start_soon(Clock(dut.i_clk, CLK_PERIOD, unit="ns").start()) + + ITERS = 1024 + + for _ in range(ITERS): + cpu_write_way = random.randint(0, WAYS-1) + cpu_write_set = random.randint(0, SETS-1) + + while True: + snoop_write_way = random.randint(0, WAYS-1) + snoop_write_set = random.randint(0, SETS-1) + if snoop_write_way != cpu_write_way and snoop_write_set != cpu_write_set: + break + + + cpu_write_data = random.randbytes(64) + cpu_write_meta = random.randint(0, 2**22-1) + + snoop_write_data = random.randbytes(64) + snoop_write_meta = random.randint(0, 2**22-1) + + data_arrays[cpu_write_way][cpu_write_set] = cpu_write_data + meta_arrays[cpu_write_way][cpu_write_set] = cpu_write_meta + + data_arrays[snoop_write_way][snoop_write_set] = snoop_write_data + meta_arrays[snoop_write_way][snoop_write_set] = snoop_write_meta + + dut.i_cpu_write_data.value = LogicArray.from_bytes(cpu_write_data, byteorder="little") + dut.i_cpu_write_meta.value = cpu_write_meta + dut.i_cpu_write_index.value = cpu_write_set + dut.i_cpu_write_valid.value = 1 << cpu_write_way + + dut.i_snoop_write_data.value = LogicArray.from_bytes(snoop_write_data, byteorder="little") + dut.i_snoop_write_meta.value = snoop_write_meta + dut.i_snoop_write_index.value = snoop_write_set + dut.i_snoop_write_valid.value = 1 << snoop_write_way + + + + cpu_read_way = random.randint(0, WAYS-1) + cpu_read_set = random.randint(0, SETS-1) + + snoop_read_way = random.randint(0, WAYS-1) + snoop_read_set = random.randint(0, SETS-1) + + dut.i_cpu_read_index.value = cpu_read_set + dut.i_snoop_read_index.value = snoop_read_set + + + dut.i_cpu_read_valid.value = 1 + dut.i_snoop_read_valid.value = 1 + + await RisingEdge(dut.i_clk) + await ReadOnly() + + cpu_data = dut.o_cpu_read_data.value[cpu_read_way].to_bytes(byteorder="little") + cpu_meta = int(dut.o_cpu_read_meta.value[cpu_read_way]) + + snoop_data = dut.o_snoop_read_data.value[snoop_read_way].to_bytes(byteorder="little") + snoop_meta = int(dut.o_snoop_read_meta.value[snoop_read_way]) + + cpu_expected_data = data_arrays[cpu_read_way][cpu_read_set] + cpu_expected_meta = meta_arrays[cpu_read_way][cpu_read_set] + + snoop_expected_data = data_arrays[snoop_read_way][snoop_read_set] + snoop_expected_meta = meta_arrays[snoop_read_way][snoop_read_set] + + if cpu_data != cpu_expected_data: + logger.error("CPU Data Error") + + if cpu_meta != cpu_expected_meta: + logger.info("CPU Meta Error") + + if snoop_data != snoop_expected_data: + logger.error("snoop Data Error") + + if snoop_meta != snoop_expected_meta: + logger.info("snoop Meta Error") + + await NextTimeStep() \ No newline at end of file diff --git a/sim/application_wrapper/cache/cache.yaml b/sim/application_wrapper/cache/cache.yaml index d6bd069..e4b6b17 100644 --- a/sim/application_wrapper/cache/cache.yaml +++ b/sim/application_wrapper/cache/cache.yaml @@ -1,7 +1,7 @@ tests: - - name: "application_wrapper_cache_l1_test" - toplevel: "application_wrapper_cache_l1" + - name: "application_wrapper_cache_arrays_test" + toplevel: "application_wrapper_cache_arrays" modules: - - "application_wrapper_cache_l1_test" + - "application_wrapper_cache_arrays_test" sources: "sources.list" waves: True \ No newline at end of file diff --git a/src/application_wrapper/cache/application_wrapper_cache_arrays.sv b/src/application_wrapper/cache/application_wrapper_cache_arrays.sv new file mode 100644 index 0000000..87580ce --- /dev/null +++ b/src/application_wrapper/cache/application_wrapper_cache_arrays.sv @@ -0,0 +1,72 @@ +module application_wrapper_cache_arrays #( + parameter NUM_WAYS = 4, + parameter NUM_SETS = 64, + + localparam DATA_W = 64*8, + localparam OFFSET_W = 6, + localparam INDEX_W = $clog2(NUM_SETS), + localparam TAG_W = 32 - INDEX_W - OFFSET_W, + + localparam META_W = TAG_W + 2 +) ( + input logic i_clk, + + input logic [INDEX_W-1:0] i_cpu_read_index, + input logic i_cpu_read_valid, + + output logic [DATA_W-1:0] o_cpu_read_data [NUM_WAYS], + output logic [META_W-1:0] o_cpu_read_meta [NUM_WAYS], + + input logic [INDEX_W-1:0] i_cpu_write_index, + input logic [NUM_WAYS-1:0] i_cpu_write_valid, + + input logic [DATA_W-1:0] i_cpu_write_data, + input logic [META_W-1:0] i_cpu_write_meta, + + input logic [INDEX_W-1:0] i_snoop_read_index, + input logic i_snoop_read_valid, + + output logic [DATA_W-1:0] o_snoop_read_data [NUM_WAYS], + output logic [META_W-1:0] o_snoop_read_meta [NUM_WAYS], + + input logic [INDEX_W-1:0] i_snoop_write_index, + input logic [NUM_WAYS-1:0] i_snoop_write_valid, + + input logic [DATA_W-1:0] i_snoop_write_data, + input logic [META_W-1:0] i_snoop_write_meta +); + + +// memory arrays. +// In order to make these WRITE_FIRST, we put a blocking assignment +// for the write data before the assignment to the read data + +logic [DATA_W-1:0] data_arrays [NUM_SETS][NUM_WAYS]; +logic [META_W-1:0] meta_arrays [NUM_SETS][NUM_WAYS]; + + +always @(posedge i_clk) begin + for (int i = 0; i < NUM_WAYS; i++) begin + if (i_cpu_write_valid[i]) begin + data_arrays[i_cpu_write_index][i] = i_cpu_write_data; + meta_arrays[i_cpu_write_index][i] = i_cpu_write_meta; + end + + if (i_snoop_write_valid[i]) begin + data_arrays[i_snoop_write_index][i] = i_snoop_write_data; + meta_arrays[i_snoop_write_index][i] = i_snoop_write_meta; + end + end + + if (i_cpu_read_valid) begin + o_cpu_read_data = data_arrays[i_cpu_read_index]; + o_cpu_read_meta = meta_arrays[i_cpu_read_index]; + end + + if (i_snoop_read_valid) begin + o_snoop_read_data = data_arrays[i_snoop_read_index]; + o_snoop_read_meta = meta_arrays[i_snoop_read_index]; + end +end + +endmodule \ No newline at end of file diff --git a/src/application_wrapper/cache/application_wrapper_cache_l1.sv b/src/application_wrapper/cache/application_wrapper_cache_l1.sv deleted file mode 100644 index 4ff9e03..0000000 --- a/src/application_wrapper/cache/application_wrapper_cache_l1.sv +++ /dev/null @@ -1,423 +0,0 @@ -import application_wrapper_cache_pkg::*; - -module application_wrapper_cache_l1 #( - parameter CACHELINE_SIZE = 64, - parameter CACHELINE_COUNT = 64, - localparam ADDR_WIDTH = 32 -)( - input logic i_clk, - input logic i_rst, - - /* CPU Interface */ - input logic [ADDR_WIDTH-1:0] i_addr, - input logic i_we, - input logic i_sync, - input logic [7:0] i_data, - output logic [7:0] o_data, - - input logic i_rdy, - output logic o_rdy, - - /* MMU Interface */ - input logic [ADDR_WIDTH-1:0] i_phys_address, - output page_table_entry_t i_table_entry, - input logic i_mmu_valid, - - /* Higher level cache interface */ - output logic [ADDR_WIDTH-1:0] o_cache_addr, - output cache_cmd_e o_cache_cmd, - output logic o_cache_valid, - - output logic [CACHELINE_SIZE*8-1:0] o_cache_data, - input logic [CACHELINE_SIZE*8-1:0] i_cache_data, - input logic i_cache_rdy -); - -// we have 32 bit addresses, 64 byte cache lines, and 64 total lines. -// Thats 6 bit for offset, 6 bit for index, and 20 bit for tag. - -// cache is virtually indexed, physically tagged - -localparam OFFSET_W = $clog2(CACHELINE_SIZE); -localparam INDEX_W = $clog2(CACHELINE_COUNT); -localparam TAG_W = ADDR_WIDTH - INDEX_W - OFFSET_W; -localparam META_W = 3; // valid, unique, clean - -typedef struct { - logic [TAG_W-1:0] tag; - logic valid; - logic shared; - logic clean; -} meta_tag_t; - -logic [OFFSET_W-1:0] offset, offset_d1; -logic [INDEX_W-1:0] index, index_d1, index_d2; -logic [TAG_W-1:0] tag, tag_d1; - -// cacheline size is in bytes, not bits -// direct mapped cache, read one line so we have data ready if its a hit. -logic [CACHELINE_SIZE*8-1:0] data_array [CACHELINE_COUNT]; -meta_tag_t meta_tag_array [CACHELINE_COUNT]; - -logic [CACHELINE_SIZE*8-1:0] current_data, current_data_next, write_data_prev; -meta_tag_t current_meta_tag, current_meta_tag_next; - -logic [OFFSET_W-1:0] read_offset, read_offset_next; -logic [INDEX_W-1:0] read_index, read_index_next; -logic [ADDR_WIDTH-1:0] read_address, read_address_next; -logic [ADDR_WIDTH-1:0] write_address, write_address_next; - -logic [CACHELINE_SIZE*8-1:0] write_data; -meta_tag_t write_meta_tag; - -logic [INDEX_W-1:0] write_index; -logic data_write_enable; -logic meta_tag_write_enable; - -logic we_d1; -logic latched_we, latched_we_next; - -logic [7:0] latched_data, latched_data_next; -logic [7:0] data_d1; - - -// performance counters -logic [31:0] eviction_count, eviction_count_next; -logic [31:0] cache_miss_count, cache_miss_count_next; - -// reset counter -logic [INDEX_W-1:0] clear_counter, clear_counter_next; - -enum logic [2:0] {RESET, CLEAR, IDLE, READY, EVICT, READ, WAIT_CLEAN_UNIQUE} prev_state, state, state_next; - -always_ff @(posedge i_clk) begin - if (i_rst) begin - state <= RESET; - - current_data <= '0; - - tag_d1 <= '0; - index_d1 <= '0; - offset_d1 <= '0; - - read_address <= '0; - write_address <= '0; - - latched_we <= '0; - - latched_data <= '0; - - eviction_count <= '0; - cache_miss_count <= '0; - - clear_counter <= '0; - - end else begin - prev_state <= state; - state <= state_next; - - current_data <= current_data_next; - write_data_prev <= write_data; - current_meta_tag <= current_meta_tag_next; - - read_offset <= read_offset_next; - read_index <= read_index_next; - - read_address <= read_address_next; - write_address <= write_address_next; - - if (data_write_enable) begin - data_array[write_index] <= write_data; - end - - if (meta_tag_write_enable) begin - meta_tag_array[write_index] <= write_meta_tag; - end - - tag_d1 <= tag; - index_d1 <= index; - index_d2 <= index_d1; - offset_d1 <= offset; - we_d1 <= i_we; - data_d1 <= i_data; - - latched_we <= latched_we_next; - latched_data <= latched_data_next; - - eviction_count <= eviction_count_next; - cache_miss_count <= cache_miss_count_next; - - clear_counter <= clear_counter_next; - end -end - -always_comb begin - state_next = state; - - current_data_next = current_data; - current_meta_tag_next = current_meta_tag; - - read_offset_next = read_offset; - read_index_next = read_index; - read_address_next = read_address; - write_address_next = write_address; - - latched_we_next = latched_we; - latched_data_next = latched_data; - - o_rdy = '0; - - o_cache_valid = '0; - o_cache_cmd = CACHE_NONE; - o_cache_addr = '0; - o_cache_data = '0; - - // vipt - offset = i_addr[OFFSET_W-1:0]; - index = i_addr[INDEX_W+OFFSET_W-1:OFFSET_W]; - tag = i_phys_address[INDEX_W+OFFSET_W+TAG_W-1:INDEX_W+OFFSET_W]; - - write_index = '0; - write_data = '0; - data_write_enable = '0; - write_meta_tag.tag = '0; - write_meta_tag.valid = '0; - write_meta_tag.shared = '0; - write_meta_tag.clean = '0; - meta_tag_write_enable = '0; - - o_data = '0; - - eviction_count_next = eviction_count; - cache_miss_count_next = cache_miss_count; - - clear_counter_next = clear_counter; - - case (state) - RESET: begin - state_next = CLEAR; - end - - CLEAR: begin - if (clear_counter == (INDEX_W)'(CACHELINE_COUNT-1)) begin - state_next = IDLE; - end - - write_data = '0; - data_write_enable = '1; - - meta_tag_write_enable = '1; - write_meta_tag.tag = '0; - write_meta_tag.valid = '0; - write_meta_tag.shared = '0; - write_meta_tag.clean = '0; - - write_index = clear_counter; - - clear_counter_next = clear_counter + 1; - end - - IDLE: begin - state_next = READY; - current_data_next = data_array[index]; - current_meta_tag_next = meta_tag_array[index]; - o_rdy = '1; - end - - READY: begin - if (!current_meta_tag.valid || (current_meta_tag.valid && current_meta_tag.tag != tag_d1 && current_meta_tag.clean)) begin - // current line is not valid, just read - // OR current line is valid, but clean so we don't need to write it back. - state_next = READ; - - read_index_next = index_d1; - read_offset_next = offset_d1; - read_address_next = {i_phys_address[31:OFFSET_W], (OFFSET_W)'('0)}; - - latched_we_next = we_d1; - latched_data_next = data_d1; - - cache_miss_count_next = cache_miss_count + 1; - end else if (current_meta_tag.valid && current_meta_tag.tag != tag_d1 && !current_meta_tag.clean) begin - // current line was valid, but the wrong tag. - state_next = EVICT; - - read_index_next = index_d1; - read_offset_next = offset_d1; - read_address_next = {i_phys_address[31:OFFSET_W], (OFFSET_W)'('0)}; - write_address_next = {current_meta_tag.tag, index_d1, (OFFSET_W)'('0)}; - - latched_we_next = we_d1; - latched_data_next = data_d1; - - cache_miss_count_next = cache_miss_count + 1; - eviction_count_next = eviction_count + 1; - end else begin - latched_we_next = i_we; - latched_data_next = i_data; - - // always be loading the next data array - current_data_next = data_array[index]; - current_meta_tag_next = meta_tag_array[index]; - - // We are accessing something we just wrote to - - if (latched_we) begin - // if we are writing to a shared cacheline, we must make it unique first! - if (current_data.shared) begin - o_rdy = '0; - - o_cache_cmd = CACHE_CLEAN_UNIQUE; - o_cache_valid = '1; - - latched_we_next = we_d1; - latched_data_next = data_d1; - - read_index_next = index_d1; - read_offset_next = offset_d1; - - - state_next = WAIT_CLEAN_UNIQUE; - end - - write_data = current_data; - - write_data[offset_d1*8 +: 8] = latched_data; - data_write_enable = '1; - - meta_tag_write_enable = '1; - write_meta_tag = current_meta_tag; - write_meta_tag.clean = '0; - - write_index = index_d1; - - if (index == write_index) begin - current_data_next = write_data; - end - end else begin - // we have a possible RAW hazard, but not after READ state - if (prev_state == READY && index_d1 == index_d2) begin - o_data = current_data[offset_d1*8 +: 8]; - end else begin - o_data = current_data[offset_d1*8 +: 8]; - end - o_rdy = '1; - end - - end - end - - EVICT: begin - o_cache_addr = write_address; - o_cache_cmd = CACHE_WRITE; - o_cache_valid = '1; - o_cache_data = current_data; - - if (i_cache_rdy) begin - state_next = READ; - end - end - - READ: begin - o_cache_addr = read_address; - if (latched_we) begin - o_cache_cmd = CACHE_READ_UNIQUE; - end else begin - o_cache_cmd = CACHE_READ_SHARED; - end - o_cache_valid = '1; - - write_index = read_index; - write_data = i_cache_data; - write_meta_tag.tag = read_address[31:INDEX_W+OFFSET_W]; - write_meta_tag.valid = '1; - write_meta_tag.shared = ~latched_we; // if we are about to write, then we requested unique - write_meta_tag.clean = ~latched_we; // if we are about to write, then mark dirty - - data_write_enable = i_cache_rdy; - meta_tag_write_enable = i_cache_rdy; - - if (i_cache_rdy) begin - state_next = READY; - current_data_next = i_cache_data; - current_meta_tag_next = write_meta_tag; - - index = write_index; - tag = read_address[31:INDEX_W+OFFSET_W]; - offset = read_offset; - end - end - - WAIT_CLEAN_UNIQUE: begin - // Write data after making it unique - if (i_cache_rdy) begin - write_data[offset_d1*8 +: 8] = latched_data; - data_write_enable = i_cache_rdy; - - meta_tag_write_enable = i_cache_rdy; - write_meta_tag.tag = read_address[31:INDEX_W+OFFSET_W]; - write_meta_tag.valid = '1; - write_meta_tag.shared = '0; - write_meta_tag.clean = '0; - - write_index = index_d1; - - if (index == write_index) begin - current_data_next = write_data; - end - - state_next = READY; - end - end - - default: begin - state_next = READY; - end - endcase -end - -/* - - In the ready state, we read from the data array and if the line is valid - and the tag matches with the address, we present the data to the cpu. - Otherwise, we lower o_rdy and send the request to the higher level cache. - - If what we read was valid but the tag didn't match, then we need to evict it. - If the line was not valid, then we don't need to evict it and can just request - the new data. - - One thing that we also need is an MMU. The TLB can be 1 cycle, then if the TLB - says that we are allowed to read from the cache, we can read from the cache. - - how do we handle writes? Since we take 1 cycle to read from the cache, we cannot - immediately write to the cache line in one cycle, we will have to wait a cycle - in order to determine if the cacheline is valid or not. To do this, we will need - to have it be pipelined, so that we store the data temporarily while we read the - meta_tag array, then if its valid we write to the cache. To avoid RAW hazards, we - also need to store the address and check if we are reading a value we just wrote. - If so, then we return this stored value instead of reading from ram, since we would - be reading at the same time as we are writing, and that could be undefined. - - basically if the index matches the previous access, then we have a hazard and need to - use the stored cacheline instead of the cacheline we read from memory, since that hasn't - been updated yet. We don't need to cache metatag since if we just wrote to it, it will - already be dirty anyway. -*/ - -/* - - In order for this to be a coherent requester, we need to adjust the protocol slightly - - * CPU Requests data to READ: Send ReadShared request - * CPU Requests data to WRITE: Send ReadUnique request - * CPU Writes to data already in cache: CleanUnique - * Clean cacheline overwritten: Send Evict request - * Dirty cacheline overwritten: Send WriteBackFull request - - The cache <-> CHI interface is handled by a separate module. -*/ - - - - -endmodule \ No newline at end of file diff --git a/src/application_wrapper/cache/application_wrapper_cache_pkg.sv b/src/application_wrapper/cache/application_wrapper_cache_pkg.sv index 6b1f480..ffb330d 100644 --- a/src/application_wrapper/cache/application_wrapper_cache_pkg.sv +++ b/src/application_wrapper/cache/application_wrapper_cache_pkg.sv @@ -18,4 +18,11 @@ package application_wrapper_cache_pkg; CACHE_CLEAN_UNIQUE } cache_cmd_e; + typedef enum logic [1:0] { + MODIFIED, + EXCLUSIVE, + SHARED, + INVALID + } mesi_e; + endpackage \ No newline at end of file diff --git a/src/application_wrapper/sources.list b/src/application_wrapper/sources.list index 638ba6b..d54a1e5 100644 --- a/src/application_wrapper/sources.list +++ b/src/application_wrapper/sources.list @@ -1,5 +1,5 @@ cache/application_wrapper_cache_pkg.sv -cache/application_wrapper_cache_l1.sv +cache/application_wrapper_cache_arrays.sv cache/application_wrapper_mmu.sv cache/application_wrapper_cache_top.sv