Fix bug where top 2 bits were getting lost in the modulo

This commit is contained in:
Byron Lathi
2025-10-30 22:34:52 -07:00
parent d6a062baa0
commit 2102cb41f4
4 changed files with 46 additions and 24 deletions

View File

@@ -78,7 +78,7 @@ async def test_sanity(dut):
await tb.cycle_reset() await tb.cycle_reset()
count = 16 count = 1024
for _ in range(count): for _ in range(count):
await tb.write_input(random.randint(1,2**128-1), random.randint(0, 2**130-6)) await tb.write_input(random.randint(1,2**128-1), random.randint(0, 2**130-6))
@@ -93,6 +93,4 @@ async def test_sanity(dut):
tb.log.info(f"{sim_val:x} -> {dut_val:x}") tb.log.info(f"{sim_val:x} -> {dut_val:x}")
fail = True fail = True
# assert not fail assert not fail
await Timer(1, "us")

View File

@@ -88,4 +88,23 @@ async def test_sanity(dut):
tb.log.info(f"{sim_val:x} -> {dut_val:x}") tb.log.info(f"{sim_val:x} -> {dut_val:x}")
fail = True fail = True
assert not fail
@cocotb.test
async def test_directed(dut):
tb = TB(dut)
await tb.cycle_reset()
await tb.write_input(0x14C0D69391E7116E057E7AD833B00B706AA2390C, 4)
fail = False
sim_val = await tb.expected_queue.get()
dut_val = await tb.output_queue.get()
if sim_val != dut_val:
tb.log.info(f"{sim_val:x} -> {dut_val:x}")
fail = True
assert not fail assert not fail

View File

@@ -1,5 +1,5 @@
module poly1305_friendly_modular_mult #( module poly1305_friendly_modular_mult #(
parameter DATA_WIDTH = 128, parameter DATA_WIDTH = 131,
parameter ACC_WIDTH = 130 parameter ACC_WIDTH = 130
) ( ) (
input logic i_clk, input logic i_clk,
@@ -14,24 +14,25 @@ module poly1305_friendly_modular_mult #(
output logic [ACC_WIDTH-1:0] o_result output logic [ACC_WIDTH-1:0] o_result
); );
localparam INT_ACC_WIDTH = ACC_WIDTH + 3; // $clog2(8)
localparam [129:0] PRIME = (1 << 130) - 5; localparam [129:0] PRIME = (1 << 130) - 5;
logic [2:0] state_counter, state_counter_next; logic [2:0] state_counter, state_counter_next;
logic [2:0] state_counter_p [4]; logic [2:0] state_counter_p [5];
logic [ACC_WIDTH-1:0] accumulator, accumulator_next; // accumulator is outgoing logic [INT_ACC_WIDTH-1:0] accumulator, accumulator_next; // accumulator is outgoing
logic [ACC_WIDTH:0] accumulator_intermediate; logic [INT_ACC_WIDTH+1:0] accumulator_intermediate;
logic [DATA_WIDTH-1:0] data, data_next; logic [DATA_WIDTH-1:0] data, data_next;
logic [ACC_WIDTH-1:0] h, h_next; // h is incoming logic [INT_ACC_WIDTH-1:0] h, h_next; // h is incoming
logic [DATA_WIDTH+26-1:0] mult_product, mult_product_next; logic [DATA_WIDTH+26-1:0] mult_product, mult_product_next;
logic [ACC_WIDTH-1:0] modulo_result; logic [ACC_WIDTH-1:0] modulo_result;
assign o_ready = state_counter >= 3'h4; assign o_ready = state_counter >= 3'h4;
assign o_result = accumulator;
always_ff @(posedge i_clk) begin always_ff @(posedge i_clk) begin
if (i_rst) begin if (i_rst) begin
@@ -46,9 +47,7 @@ always_ff @(posedge i_clk) begin
state_counter_p[0] <= state_counter; state_counter_p[0] <= state_counter;
o_valid <= state_counter_p[3] == 3'h4; for (int i = 1; i < 5; i++) begin
for (int i = 1; i < 4; i++) begin
state_counter_p[i] <= state_counter_p[i-1]; state_counter_p[i] <= state_counter_p[i-1];
end end
end end
@@ -72,25 +71,19 @@ always_comb begin
if (state_counter >= 3'h4 && i_valid) begin if (state_counter >= 3'h4 && i_valid) begin
data_next = i_data; data_next = i_data;
h_next = i_accumulator; h_next = (INT_ACC_WIDTH)'(i_accumulator);
state_counter_next = '0; state_counter_next = '0;
end end
if (state_counter_p[3] == '0) begin if (state_counter_p[3] == '0) begin
accumulator_next = modulo_result; accumulator_next = (INT_ACC_WIDTH)'(modulo_result);
end else begin end else begin
accumulator_intermediate = accumulator + modulo_result; accumulator_next = accumulator + (INT_ACC_WIDTH)'(modulo_result);
if (accumulator_intermediate[130]) begin
// if we wrapped around, we need to add 5
accumulator_next = accumulator_intermediate[129:0] + 5;
end else begin
accumulator_next = accumulator_intermediate[129:0];
end
end end
end end
poly1305_friendly_modulo u_modulo ( poly1305_friendly_modulo u_mult_modulo (
.i_clk (i_clk), .i_clk (i_clk),
.i_rst (i_rst), .i_rst (i_rst),
@@ -102,4 +95,16 @@ poly1305_friendly_modulo u_modulo (
.o_result (modulo_result) .o_result (modulo_result)
); );
poly1305_friendly_modulo u_sum_modulo (
.i_clk (i_clk),
.i_rst (i_rst),
.i_valid (state_counter_p[4] == 3'h4),
.i_val ({127'b0, accumulator}),
.i_shift_amount ('0),
.o_valid (o_valid),
.o_result (o_result)
);
endmodule endmodule

View File

@@ -36,7 +36,7 @@ assign o_valid = valid_sr[2];
always_ff @(posedge i_clk) begin always_ff @(posedge i_clk) begin
valid_sr <= {valid_sr[1:0], i_valid}; valid_sr <= {valid_sr[1:0], i_valid};
high_part_1 <= WIDTH'({3'b0, i_val} >> (130 - (i_shift_amount*SHIFT_SIZE))) * MDIFF; high_part_1 <= WIDE_WIDTH'({3'b0, i_val} >> (130 - (i_shift_amount*SHIFT_SIZE))) * MDIFF;
low_part_1 <= WIDTH'(i_val << (i_shift_amount*SHIFT_SIZE)); low_part_1 <= WIDTH'(i_val << (i_shift_amount*SHIFT_SIZE));
high_part_2 <= (intermediate_val >> WIDTH) * 5; high_part_2 <= (intermediate_val >> WIDTH) * 5;