big: Add `internal_int_exponent_mod`.

author: Jeroen van Rijn <Kelimion@users.noreply.github.com> 2021-08-31 16:43:07 +0200
committer: Jeroen van Rijn <Kelimion@users.noreply.github.com> 2021-09-01 19:13:47 +0200
commit: 65a15e9c060d74bc3a7977c8c3329ec43dc810b2 (patch)
tree: 012c4e36e098b0e964e4817c16e70eb5785f9005 /core
parent: c3a70ac277494b70e86578f1ce31923a0ca8d2c8 (diff)
13 files changed, 294 insertions, 46 deletions
diff --git a/core/math/big/api.odin b/core/math/big/api.odin
index 1f2eab8d7..e2761b425 100644
--- a/core/math/big/api.odin
+++ b/core/math/big/api.odin
@@ -1,14 +1,15 @@
-package math_big
-
 /*
 	Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
-	Made available under Odin's BSD-2 license.
+	Made available under Odin's BSD-3 license.
 
 	An arbitrary precision mathematics implementation in Odin.
 	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
 	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
 
 	This file collects public proc maps and their aliases.
+*/
+package math_big
+/*
 
 	=== === === === === === === === === === === === === === === === === === === === === === === ===
 	                                    Basic arithmetic.
diff --git a/core/math/big/common.odin b/core/math/big/common.odin
index ce1f7d77f..4171d25f3 100644
--- a/core/math/big/common.odin
+++ b/core/math/big/common.odin
@@ -1,5 +1,3 @@
-package math_big
-
 /*
 	Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
 	Made available under Odin's BSD-3 license.
@@ -8,6 +6,7 @@ package math_big
 	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
 	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
 */
+package math_big
 
 import "core:intrinsics"
 
@@ -57,10 +56,10 @@ when #config(MATH_BIG_EXE, true) {
 	debugged where necessary.
 */
 
-_DEFAULT_MUL_KARATSUBA_CUTOFF :: #config(MUL_KARATSUBA_CUTOFF,  80);
-_DEFAULT_SQR_KARATSUBA_CUTOFF :: #config(SQR_KARATSUBA_CUTOFF, 120);
-_DEFAULT_MUL_TOOM_CUTOFF      :: #config(MUL_TOOM_CUTOFF,      350);
-_DEFAULT_SQR_TOOM_CUTOFF      :: #config(SQR_TOOM_CUTOFF,      400);
+_DEFAULT_MUL_KARATSUBA_CUTOFF :: #config(MATH_BIG_MUL_KARATSUBA_CUTOFF,  80);
+_DEFAULT_SQR_KARATSUBA_CUTOFF :: #config(MATH_BIG_SQR_KARATSUBA_CUTOFF, 120);
+_DEFAULT_MUL_TOOM_CUTOFF      :: #config(MATH_BIG_MUL_TOOM_CUTOFF,      350);
+_DEFAULT_SQR_TOOM_CUTOFF      :: #config(MATH_BIG_SQR_TOOM_CUTOFF,      400);
 
 
 MAX_ITERATIONS_ROOT_N := 500;
@@ -85,15 +84,22 @@ FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS := 100;
 
 	2) Optimizations thanks to precomputed masks wouldn't work.
 */
-MATH_BIG_FORCE_64_BIT :: #config(MATH_BIG_FORCE_64_BIT, false);
-MATH_BIG_FORCE_32_BIT :: #config(MATH_BIG_FORCE_32_BIT, false);
+MATH_BIG_FORCE_64_BIT   :: #config(MATH_BIG_FORCE_64_BIT, false);
+MATH_BIG_FORCE_32_BIT   :: #config(MATH_BIG_FORCE_32_BIT, false);
 when (MATH_BIG_FORCE_32_BIT && MATH_BIG_FORCE_64_BIT) { #panic("Cannot force 32-bit and 64-bit big backend simultaneously."); };
 
-_LOW_MEMORY           :: #config(BIGINT_SMALL_MEMORY, false);
+/*
+	Trade a smaller memory footprint for more processing overhead?
+*/
+_LOW_MEMORY             :: #config(MATH_BIG_SMALL_MEMORY, false);
 when _LOW_MEMORY {
-	_DEFAULT_DIGIT_COUNT :: 8;
+	_DEFAULT_DIGIT_COUNT ::   8;
+	_TAB_SIZE            ::  32;
+	_MAX_WIN_SIZE        ::   5;
 } else {
-	_DEFAULT_DIGIT_COUNT :: 32;
+	_DEFAULT_DIGIT_COUNT ::  32;
+	_TAB_SIZE            :: 256;
+	_MAX_WIN_SIZE        ::   0;
 }
 
 /*
diff --git a/core/math/big/example.odin b/core/math/big/example.odin
index e2ed30680..18b6062d9 100644
--- a/core/math/big/example.odin
+++ b/core/math/big/example.odin
@@ -1,6 +1,4 @@
 //+ignore
-package math_big
-
 /*
 	Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
 	Made available under Odin's BSD-3 license.
@@ -9,6 +7,8 @@ package math_big
 	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
 	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
 */
+package math_big
+
 
 import "core:fmt"
 import "core:mem"
@@ -18,11 +18,14 @@ print_configation :: proc() {
 `
 Configuration:
 	_DIGIT_BITS                           %v
+	_SMALL_MEMORY                         %v
 	_MIN_DIGIT_COUNT                      %v
 	_MAX_DIGIT_COUNT                      %v
 	_DEFAULT_DIGIT_COUNT                  %v
 	_MAX_COMBA                            %v
 	_WARRAY                               %v
+	_TAB_SIZE                             %v
+	_MAX_WIN_SIZE                         %v
 Runtime tunable:
 	MUL_KARATSUBA_CUTOFF                  %v
 	SQR_KARATSUBA_CUTOFF                  %v
@@ -34,11 +37,14 @@ Runtime tunable:
 	FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS %v
 
 `, _DIGIT_BITS,
+_LOW_MEMORY,
 _MIN_DIGIT_COUNT,
 _MAX_DIGIT_COUNT,
 _DEFAULT_DIGIT_COUNT,
 _MAX_COMBA,
 _WARRAY,
+_TAB_SIZE,
+_MAX_WIN_SIZE,
 MUL_KARATSUBA_CUTOFF,
 SQR_KARATSUBA_CUTOFF,
 MUL_TOOM_CUTOFF,
@@ -203,8 +209,18 @@ int_to_byte_little :: proc(v: ^Int) {
 }
 
 demo :: proc() {
-	// a, b, c, d, e, f := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
-	// defer destroy(a, b, c, d, e, f);
+	a, b, c, d, e, f, res := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
+	defer destroy(a, b, c, d, e, f, res);
+
+	set(a, 42);
+	set(b, 6);
+	set(c, 5);
+
+	if err := internal_int_exponent_mod(res, a, b, c, 0); err != nil {
+		fmt.printf("Error: %v\n", err);
+	}
+
+	print("res: ", res);
 }
 
 main :: proc() {
diff --git a/core/math/big/helpers.odin b/core/math/big/helpers.odin
index 8ce1b2811..ff654172c 100644
--- a/core/math/big/helpers.odin
+++ b/core/math/big/helpers.odin
@@ -1,5 +1,3 @@
-package math_big
-
 /*
 	Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
 	Made available under Odin's BSD-3 license.
@@ -8,6 +6,7 @@ package math_big
 	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
 	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
 */
+package math_big
 
 import "core:intrinsics"
 import rnd "core:math/rand"
diff --git a/core/math/big/internal.odin b/core/math/big/internal.odin
index 9422067ae..789163af2 100644
--- a/core/math/big/internal.odin
+++ b/core/math/big/internal.odin
@@ -1,6 +1,4 @@
 //+ignore
-package math_big
-
 /*
 	Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
 	Made available under Odin's BSD-3 license.
@@ -31,6 +29,7 @@ package math_big
 
 	TODO: Handle +/- Infinity and NaN.
 */
+package math_big
 
 import "core:mem"
 import "core:intrinsics"
diff --git a/core/math/big/logical.odin b/core/math/big/logical.odin
index 64f3b0898..1e7f8e1b1 100644
--- a/core/math/big/logical.odin
+++ b/core/math/big/logical.odin
@@ -1,5 +1,3 @@
-package math_big
-
 /*
 	Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
 	Made available under Odin's BSD-3 license.
@@ -10,6 +8,7 @@ package math_big
 
 	This file contains logical operations like `and`, `or` and `xor`.
 */
+package math_big
 
 /*
 	The `and`, `or` and `xor` binops differ in two lines only.
diff --git a/core/math/big/prime.odin b/core/math/big/prime.odin
index 6a3a098a4..1947ac634 100644
--- a/core/math/big/prime.odin
+++ b/core/math/big/prime.odin
@@ -1,5 +1,3 @@
-package math_big
-
 /*
 	Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
 	Made available under Odin's BSD-3 license.
@@ -10,6 +8,7 @@ package math_big
 
 	This file contains prime finding operations.
 */
+package math_big
 
 /*
 	Determines if an Integer is divisible by one of the _PRIME_TABLE primes.
@@ -223,7 +222,7 @@ internal_int_reduce :: proc(x, m, mu: ^Int, allocator := context.allocator) -> (
 	/*
 		q = x
 	*/
-	copy(q, x)                                                       or_return;
+	internal_copy(q, x)                                              or_return;
 
 	/*
 		q1 = x / b**(k-1)
@@ -234,7 +233,7 @@ internal_int_reduce :: proc(x, m, mu: ^Int, allocator := context.allocator) -> (
 		According to HAC this optimization is ok.
 	*/
 	if DIGIT(um) > DIGIT(1) << (_DIGIT_BITS - 1) {
-		mul(q, q, mu)                                                or_return;
+		internal_mul(q, q, mu)                                       or_return;
 	} else {
 		_private_int_mul_high(q, q, mu, um)                          or_return;
 	}
@@ -435,32 +434,257 @@ internal_int_reduce_2k_setup :: proc(a: ^Int, allocator := context.allocator) ->
 
 /*
 	Determines the setup value.
-	Assumes `a` is not `nil`.
+	Assumes `mu` and `P` are not `nil`.
+
+	d := (1 << a.bits) - a;
 */
-internal_int_reduce_2k_setup_l :: proc(a, d: ^Int, allocator := context.allocator) -> (err: Error) {
+internal_int_reduce_2k_setup_l :: proc(mu, P: ^Int, allocator := context.allocator) -> (err: Error) {
 	context.allocator = allocator;
 
 	tmp := &Int{};
 	defer internal_destroy(tmp);
 	internal_zero(tmp)                                               or_return;
 
-	internal_int_power_of_two(tmp, internal_count_bits(a))           or_return;
-	internal_sub(d, tmp, a)                                          or_return;
+	internal_int_power_of_two(tmp, internal_count_bits(P))           or_return;
+	internal_sub(mu, tmp, P)                                         or_return;
 
 	return nil;
 }
 
 /*
 	Pre-calculate the value required for Barrett reduction.
-	For a given modulus "b" it calulates the value required in "a"
+	For a given modulus "P" it calulates the value required in "mu"
+	Assumes `mu` and `P` are not `nil`.
 */
-internal_int_reduce_setup :: proc(a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+internal_int_reduce_setup :: proc(mu, P: ^Int, allocator := context.allocator) -> (err: Error) {
 	context.allocator = allocator;
 
-	internal_int_power_of_two(a, b.used * 2 * _DIGIT_BITS)           or_return;
-	return internal_int_div(a, a, b);
+	internal_int_power_of_two(mu, P.used * 2 * _DIGIT_BITS)           or_return;
+	return internal_int_div(mu, mu, P);
 }
 
+/*
+	Computes res == G**X mod P.
+	Assumes `res`, `G`, `X` and `P` to not be `nil` and for `G`, `X` and `P` to have been initialized.
+*/
+internal_int_exponent_mod :: proc(res, G, X, P: ^Int, redmode: int, allocator := context.allocator) -> (err: Error) {
+	context.allocator = allocator;
+
+	M := [_TAB_SIZE]Int{};
+	winsize: uint;
+
+	redux: #type proc(x, m, mu: ^Int, allocator := context.allocator) -> (err: Error);
+
+	defer {
+		internal_destroy(&M[1]);
+		for x := 1 << (winsize - 1); x < (1 << winsize); x += 1 {
+			internal_destroy(&M[x]);
+		}
+	}
+
+	/*
+		Find window size.
+	*/
+	x := internal_count_bits(X);
+	switch {
+	case x <= 7:
+		winsize = 2;
+	case x <= 36:
+		winsize = 3;
+	case x <= 140:
+		winsize = 4;
+	case x <= 450:
+		winsize = 5;
+	case x <= 1303:
+		winsize = 6;
+	case x <= 3529:
+		winsize = 7;
+	case:
+		winsize = 8;
+	}
+
+	winsize = min(_MAX_WIN_SIZE, winsize) if _MAX_WIN_SIZE > 0 else winsize;
+
+	/*
+		Init M array.
+		Init first cell.
+	*/
+	internal_zero(&M[1])                                             or_return;
+
+	/*
+		Now init the second half of the array.
+	*/
+	for x = 1 << (winsize - 1); x < (1 << winsize); x += 1 {
+		internal_zero(&M[x])                                         or_return;
+	}
+
+	/*
+		Create `mu`, used for Barrett reduction.
+	*/
+	mu := &Int{};
+	defer internal_destroy(mu);
+	internal_zero(mu)                                                or_return;
+
+	if redmode == 0 {
+		internal_int_reduce_setup(mu, P)                             or_return;
+		redux = internal_int_reduce;
+	} else {
+		internal_int_reduce_2k_setup_l(mu, P)                        or_return;
+		redux = internal_int_reduce_2k_l;
+	}
+
+	/*
+		Create M table.
+
+		The M table contains powers of the base, e.g. M[x] = G**x mod P.
+		The first half of the table is not computed, though, except for M[0] and M[1].
+	*/
+	internal_int_mod(&M[1], G, P)                                    or_return;
+
+	/*
+		Compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times.
+
+		TODO: This can probably be replaced by computing the power and using `pow` to raise to it
+		instead of repeated squaring.
+	*/
+	slot := 1 << (winsize - 1);
+	internal_copy(&M[slot], &M[1])                                   or_return;
+
+	for x = 0; x < int(winsize - 1); x += 1 {
+		/*
+			Square it.
+		*/
+		internal_sqr(&M[slot], &M[slot])                             or_return;
+
+		/*
+			Reduce modulo P
+		*/
+		redux(&M[slot], P, mu)                                       or_return;
+	}
+
+	/*
+		Create upper table, that is M[x] = M[x-1] * M[1] (mod P)
+		for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
+	*/
+	for x = slot + 1; x < (1 << winsize); x += 1 {
+		internal_mul(&M[x], &M[x - 1], &M[1])                        or_return;
+		redux(&M[x], P, mu)                                          or_return;
+	}
+
+	/*
+		Setup result.
+	*/
+	internal_one(res)                                                or_return;
+
+	/*
+		Set initial mode and bit cnt.
+	*/
+	mode   := 0;
+	bitcnt := 1;
+	buf    := DIGIT(0);
+	digidx := X.used - 1;
+	bitcpy := uint(0);
+	bitbuf := DIGIT(0);
+
+	for {
+		/*
+			Grab next digit as required.
+		*/
+		bitcnt -= 1;
+		if bitcnt == 0 {
+			/*
+				If digidx == -1 we are out of digits.
+			*/
+			if digidx == -1 { break; }
+
+			/*
+				Read next digit and reset the bitcnt.
+			*/
+			buf    = X.digit[digidx];
+			digidx -= 1;
+			bitcnt = _DIGIT_BITS;
+		}
+
+		/*
+			Grab the next msb from the exponent.
+		*/
+		y := buf >> (_DIGIT_BITS - 1) & 1;
+		buf <<= 1;
+
+		/*
+			If the bit is zero and mode == 0 then we ignore it.
+			These represent the leading zero bits before the first 1 bit
+			in the exponent.  Technically this opt is not required but it
+			does lower the # of trivial squaring/reductions used.
+		*/
+		if mode == 0 && y == 0 {
+			continue;
+		}
+
+		/*
+			If the bit is zero and mode == 1 then we square.
+		*/
+		if mode == 1 && y == 0 {
+			internal_sqr(res, res)                                   or_return;
+			redux(res, P, mu)                                        or_return;
+			continue;
+		}
+
+		/*
+			Else we add it to the window.
+		*/
+		bitcpy += 1;
+		bitbuf |= (y << (winsize - bitcpy));
+		mode    = 2;
+
+		if (bitcpy == winsize) {
+			/*
+				Window is filled so square as required and multiply.
+				Square first.
+			*/
+			for x = 0; x < int(winsize); x += 1 {
+				internal_sqr(res, res)                               or_return;
+				redux(res, P, mu)                                    or_return;
+			}
+
+			/*
+				Then multiply.
+			*/
+			internal_mul(res, res, &M[bitbuf])                       or_return;
+			redux(res, P, mu)                                        or_return;
+
+			/*
+				Empty window and reset.
+			*/
+			bitcpy = 0;
+			bitbuf = 0;
+			mode   = 1;
+		}
+	}
+
+	/*
+		If bits remain then square/multiply.
+	*/
+	if mode == 2 && bitcpy > 0 {
+		/*
+			Square then multiply if the bit is set.
+		*/
+		for x = 0; x < int(bitcpy); x += 1 {
+			internal_sqr(res, res)                                   or_return;
+			redux(res, P, mu)                                        or_return;
+
+			bitbuf <<= 1;
+			if ((bitbuf & (1 << winsize)) != 0) {
+				/*
+					Then multiply.
+				*/
+				internal_mul(res, res, &M[1])                        or_return;
+				redux(res, P, mu)                                    or_return;
+			}
+		}
+	}
+	return err;
+}
 
 /*
 	Returns the number of Rabin-Miller trials needed for a given bit size.
diff --git a/core/math/big/private.odin b/core/math/big/private.odin
index d71946ce2..7e839337f 100644
--- a/core/math/big/private.odin
+++ b/core/math/big/private.odin
@@ -1,5 +1,3 @@
-package math_big
-
 /*
 	Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
 	Made available under Odin's BSD-3 license.
@@ -17,6 +15,7 @@ package math_big
 
 	These aren't exported for the same reasons.
 */
+package math_big
 
 import "core:intrinsics"
 import "core:mem"
diff --git a/core/math/big/public.odin b/core/math/big/public.odin
index 542725289..d69b3ba22 100644
--- a/core/math/big/public.odin
+++ b/core/math/big/public.odin
@@ -1,5 +1,3 @@
-package math_big
-
 /*
 	Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
 	Made available under Odin's BSD-3 license.
@@ -10,6 +8,7 @@ package math_big
 
 	This file contains basic arithmetic operations like `add`, `sub`, `mul`, `div`, ...
 */
+package math_big
 
 /*
 	===========================
diff --git a/core/math/big/radix.odin b/core/math/big/radix.odin
index acf0bacbd..8a7040158 100644
--- a/core/math/big/radix.odin
+++ b/core/math/big/radix.odin
@@ -1,5 +1,3 @@
-package math_big
-
 /*
 	Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
 	Made available under Odin's BSD-3 license.
@@ -14,6 +12,7 @@ package math_big
 		- Use Barrett reduction for non-powers-of-two.
 		- Also look at extracting and splatting several digits at once.
 */
+package math_big
 
 import "core:intrinsics"
 import "core:mem"
diff --git a/core/math/big/test.odin b/core/math/big/test.odin
index ea3c6be49..8d60fc5ee 100644
--- a/core/math/big/test.odin
+++ b/core/math/big/test.odin
@@ -1,6 +1,4 @@
 //+ignore
-package math_big
-
 /*
 	Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
 	Made available under Odin's BSD-3 license.
@@ -11,6 +9,7 @@ package math_big
 
 	This file exports procedures for use with the test.py test suite.
 */
+package math_big
 
 /*
 	TODO: Write tests for `internal_*` and test reusing parameters with the public implementations.
diff --git a/core/math/big/test.py b/core/math/big/test.py
index df59fa1c8..e095b061e 100644
--- a/core/math/big/test.py
+++ b/core/math/big/test.py
@@ -1,3 +1,12 @@
+#
+#	Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+#	Made available under Odin's BSD-3 license.
+#
+#	A BigInt implementation in Odin.
+#	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
+#	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
+#
+
 from ctypes import *
 from random import *
 import math
diff --git a/core/math/big/tune.odin b/core/math/big/tune.odin
index 700a5e74a..3381065bb 100644
--- a/core/math/big/tune.odin
+++ b/core/math/big/tune.odin
@@ -1,6 +1,4 @@
 //+ignore
-package math_big
-
 /*
 	Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
 	Made available under Odin's BSD-3 license.
@@ -9,6 +7,7 @@ package math_big
 	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
 	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
 */
+package math_big
 
 import "core:fmt"
 import "core:time"
author	Jeroen van Rijn <Kelimion@users.noreply.github.com>	2021-08-31 16:43:07 +0200
committer	Jeroen van Rijn <Kelimion@users.noreply.github.com>	2021-09-01 19:13:47 +0200
commit	65a15e9c060d74bc3a7977c8c3329ec43dc810b2 (patch)
tree	012c4e36e098b0e964e4817c16e70eb5785f9005 /core
parent	c3a70ac277494b70e86578f1ce31923a0ca8d2c8 (diff)