From 003527ee0d77145404ccb0b7fa2f66fb09bc1269 Mon Sep 17 00:00:00 2001 From: Byron Lathi Date: Sun, 26 Oct 2025 16:09:16 -0700 Subject: [PATCH] Do poly1305 with absolutely no modulo operators --- ChaCha20_Poly1305_64/sim/do_poly_1305.py | 14 +++++++++++--- ChaCha20_Poly1305_64/sim/modulo_theory.py | 12 ++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/ChaCha20_Poly1305_64/sim/do_poly_1305.py b/ChaCha20_Poly1305_64/sim/do_poly_1305.py index dc0377e..ee5d1f0 100644 --- a/ChaCha20_Poly1305_64/sim/do_poly_1305.py +++ b/ChaCha20_Poly1305_64/sim/do_poly_1305.py @@ -1,5 +1,7 @@ from typing import List +from modulo_theory import friendly_modular_mult, friendly_modulo + def mask_r(r: int) -> int: r_bytes = r.to_bytes(16, "little") @@ -40,7 +42,12 @@ def parallel_poly1305(message: bytes, r: int, s: int, lanes: int): r = mask_r(r) p = 2**130-5 - r_powers = [r**i % p for i in range(lanes+1)] + r_powers = [1, r] + + for l_pow_log2 in range(3): + l_pow = 2**l_pow_log2 + for r_pow in range(1,l_pow+1): + r_powers.append(friendly_modular_mult(r_powers[l_pow], r_powers[r_pow])) acc = [0]*lanes @@ -53,12 +60,13 @@ def parallel_poly1305(message: bytes, r: int, s: int, lanes: int): idx = i*lanes + j power = min(lanes, len(blocks) - idx) + # There is a division here but we can get this value somehow else byte_length = (lane.bit_length() + 7) // 8 lane += 1 << (8*byte_length) - acc[j] = ((acc[j] + lane)*(r_powers[power])) % p + acc[j] = friendly_modular_mult(acc[j] + lane, r_powers[power]) - combined_acc = sum(acc) % p + combined_acc = friendly_modulo(sum(acc), 0) combined_acc += s return combined_acc & (2**128-1) diff --git a/ChaCha20_Poly1305_64/sim/modulo_theory.py b/ChaCha20_Poly1305_64/sim/modulo_theory.py index b303246..84cf82c 100644 --- a/ChaCha20_Poly1305_64/sim/modulo_theory.py +++ b/ChaCha20_Poly1305_64/sim/modulo_theory.py @@ -50,6 +50,18 @@ def modulo_theory_full(loops: int): print(f"{mod_sum=}") print(f"{mod_conventional=}") +def friendly_modular_mult(value_a: int, value_b: int) -> int: + a_partials = [(value_a >> 26*i) & (2**26-1) for i in range(5)] + + prods = [a_partial * value_b for a_partial in a_partials] + + mods = [friendly_modulo(prod, 26*i) for i, prod in enumerate(prods)] + + + mod_sum = friendly_modulo(sum(mods), 0) + + return mod_sum + def friendly_modulo(val: int, shift_amount: int) -> int: high_part = val >> (130-shift_amount) low_part = (val << shift_amount) & (2**130-1)