aboutsummaryrefslogtreecommitdiff
path: root/core/math/big
diff options
context:
space:
mode:
authorJeroen van Rijn <Kelimion@users.noreply.github.com>2021-08-11 10:50:59 +0200
committerJeroen van Rijn <Kelimion@users.noreply.github.com>2021-08-11 20:59:54 +0200
commit5f34ff9f9f6bdbc62d6111f85167229ac8440747 (patch)
tree035233637be0b132c8dafef004f0d806ff0814a1 /core/math/big
parent2b274fefbb214533399b804acce8977ecaa3afeb (diff)
big: Add `_private_int_sqr_toom`.
Diffstat (limited to 'core/math/big')
-rw-r--r--core/math/big/common.odin3
-rw-r--r--core/math/big/example.odin14
-rw-r--r--core/math/big/internal.odin5
-rw-r--r--core/math/big/private.odin134
4 files changed, 131 insertions, 25 deletions
diff --git a/core/math/big/common.odin b/core/math/big/common.odin
index 56b8a1083..a0cddb88e 100644
--- a/core/math/big/common.odin
+++ b/core/math/big/common.odin
@@ -40,6 +40,9 @@ SQR_TOOM_CUTOFF := _DEFAULT_SQR_TOOM_CUTOFF;
It would also be cool if we collected some data across various processor families.
This would let uss set reasonable defaults at runtime as this library initializes
itself by using `cpuid` or the ARM equivalent.
+
+ IMPORTANT: The 32_BIT path has largely gone untested. It needs to be tested and
+ debugged where necessary.
*/
_DEFAULT_MUL_KARATSUBA_CUTOFF :: #config(MUL_KARATSUBA_CUTOFF, 80);
diff --git a/core/math/big/example.odin b/core/math/big/example.odin
index 1d9a4cb3c..467c4d517 100644
--- a/core/math/big/example.odin
+++ b/core/math/big/example.odin
@@ -82,24 +82,22 @@ demo :: proc() {
// if err = factorial(a, 850); err != nil { fmt.printf("factorial err: %v\n", err); return; }
- foo := "615037959146039477924633848896619112832171971562900618409305032006863881436080";
- if err = atoi(a, foo, 10); err != nil { return; }
- print("a: ", a, 10, true, true, true);
- fmt.println();
+ foo := "54fc32611b510b653a608aaf41ca6d8e6a927520c87124d6bc9df29e29dcafbb0096428ca4a48905a3b6c9f02c56983d6d14711e7f5ce433feca8fcf382cdbd76cd627c45bc55423b8aea7f1bf638e81a3182ccd8b937467ca3916b37e67d0d4a1f3a0400360e8b02211a61071549525c4a1d4b32bfc83381e00d7d977bcf8f76e74d7a5a9532b75adfe67b6511cb377fa2828f3d9f989b3a532e2ded695796052ec3073267c11270fd393087a0ddf02f480f31149ee0c889811a8e43c25b906c9be5627bab8ba8eeba80ebdbfa0c6fe988542398d17f9df13887ddf5b109fc70033b325ec79340bd3e8d0e9217d0095fa1d5ed8750b479e2f85dd15bba5ce8ab9376d7fba183435d6d7b67e244358464efea9f5f3311efca81e36a5875e484cba3bce9536c87a038a16b85817812afde4ac8592af4f0a34ecb2e35ec160755ae17c9ad5ebee8f8a3687a06ce6a8385c161c275d1f1c1e10b64eeab5a56d76d5574c7a19a177c4018ec61f85f636697f177160452535de3a751c61f2156cd33a61e4b290b79429286397f6e58ee4936d4f1fd911b6511215fc9690e21b6f0f0c315341d5a192c40d6543c330a92c2161639f915ae50751ecdfcf6b5a489b4a874867a559e962c5464f69e50916c7621d8c7941357883e0ac5ba44dcf5cfccaa6a83ae035d66d9f00de1f115ceae4bb87133ab03d960b1c29ec801c2ac880b65d6ffd676b2c0a0f32282c47cfa977fa03ba94939cc6ec8666be46e8ce6d8a473089fd313dad75bdf99b024ad2b2bec434a2bde6102adee4c10fc53836fcade7e5eeef09a1dbeec913e535ba39b05035316e81723b93d465bd952bf1faada1564111836f022c482be52568f0c061bf0666db8c33e7a4f05c5792d0914b4ef7b654735bc1d363ca15af78ad32c0db6247b5721517393d47ce267f3a7e888642c7f595d7c8e84f680d766551bde0f9002f040a688973d3498c0ab4443b33652639018d33e0e9fe97831ba1bf7531b4d69f84e1a99cd58105756dd78ba9524ed4d236387ce9211ecab98048a728f5526c44372e44ed7a6853eeacda09d659119b0082cec1c1a6295fd9c6439f95dc60a9467e4296c0babb0138dae22ef64716fbaf0cbcd4c3047b976ae923f75fc6f4303e2ffe2792ff367d48b82b162182700bd08622a0b3304a6b0e1154ae66dc3a1607fc03026a1b81a7ec904aa98c201551baf5da48d78916ab5acb88df7bc57d7b4f1e96716dbfe56366c20cd28c2ae9d007c7c65c7fb4b6ccf108d6f23215f27913bb57ed13f9a75fb017ca5242a46ef2248da0b60ca1cb5cd80219367ed61082b3ca6c07ae581430c334de073e241993cc7458be8017c4d8c1a2b9d9d2e4e72fff048c2a70b2f57e8259a0dac6cdab9dd4d7bcf69b401d52a67a656d62730672fa3b05aa83fbc97a2362bb6c218d9c659dbfd64e20cc7f0977ba2c2ad695202fee68aca07698d3e4a9677d8ea69099d1ccb113ddb695017d6ce0da36fa3c8f3fd22ad400f53335e1e965d3f3323575927273987cb9e798b222e125c4290a4bf751b8e5329a6690b32bedf6f90786511d55da1dabe963cff83686f454b1b1fa28c46abc20c4b500d9ecad4b3f3c446fb74eebb596aea1c6f079b43f167f228cab3ceab8965c34d5b0c760221ca441b6d1d75b5c39d7443fc58933bc2cadb1c5d1b92ac70179677feef4ee63a8f8fb76eeb20e705b58f138867db80bfaf4e2edba0a7f68e56e4650b2d6e8fdf634d74f7ba8df527fd3c3a03a987e9b73d2e50aea3020251c06d5629dd32790bc36f02c638b757441a813c101f3d81eb6a4d1edd147ba9dbbc2d3c419fa11bda7f3602dda5dc2f9a19fc7306660d20ecb1da3bb87b8415e340f2a3d1e843bc19059734e2417f1783c7859f00c6801dc51acf068d57777de7d100ae77bca7614a69efe557d574abb2b79d5d90ba621aa2b6460cdce64012cad33bf374989044bba0d280c26b71a1a2d7abf7bd75009a71db9f488c4b3b58db217689bc355d68817f6153736f597e3586780175960edf4fded6513aa8c6154cee94e278e9967d0f256a4b14a2cd188e4800f146118a35633476c665edc7460658dc7877f107bcd3108d";
+ if err = atoi(a, foo, 16); err != nil { return; }
+ //print("a: ", a, 16, true, true, true);
+ //fmt.println();
{
SCOPED_TIMING(.sqr);
- if err = sqr(b, a); err != nil { fmt.printf("sqr err: %v\n", err); return; }
+ if err = _private_int_sqr_toom(b, a); err != nil { fmt.printf("sqr err: %v\n", err); return; }
}
fmt.println();
- print("b _sqr_karatsuba: ", b);
- fmt.println();
bs, err = itoa(b, 16);
defer delete(bs);
if bs[:50] != "1C367982F3050A8A3C62A8A7906D165438B54B287AF3F15D36" {
- fmt.println("sqr failed");
+ fmt.println("sqr failed");
}
}
diff --git a/core/math/big/internal.odin b/core/math/big/internal.odin
index 00b1ed7bf..aac2db9ec 100644
--- a/core/math/big/internal.odin
+++ b/core/math/big/internal.odin
@@ -36,8 +36,6 @@ import "core:mem"
import "core:intrinsics"
import rnd "core:math/rand"
-// import "core:fmt"
-
/*
Low-level addition, unsigned. Handbook of Applied Cryptography, algorithm 14.7.
@@ -630,8 +628,7 @@ internal_int_mul :: proc(dest, src, multiplier: ^Int, allocator := context.alloc
/*
Use Toom-Cook?
*/
- // fmt.printf("_private_int_sqr_toom: %v\n", src.used);
- err = #force_inline _private_int_sqr_karatsuba(dest, src);
+ err = #force_inline _private_int_sqr_toom(dest, src);
} else if src.used >= SQR_KARATSUBA_CUTOFF {
/*
Karatsuba?
diff --git a/core/math/big/private.odin b/core/math/big/private.odin
index 3d8497c72..9b5677bb0 100644
--- a/core/math/big/private.odin
+++ b/core/math/big/private.odin
@@ -387,37 +387,145 @@ _private_int_sqr_karatsuba :: proc(dest, src: ^Int, allocator := context.allocat
x0.used = B;
x1.used = src.used - B;
- internal_copy_digits(x0, src, x0.used);
+ #force_inline internal_copy_digits(x0, src, x0.used);
#force_inline mem.copy_non_overlapping(&x1.digit[0], &src.digit[B], size_of(DIGIT) * x1.used);
- internal_clamp(x0);
+ #force_inline internal_clamp(x0);
/*
Now calc the products x0*x0 and x1*x1.
*/
- if err = internal_sqr(x0x0, x0); err != nil { return err; }
- if err = internal_sqr(x1x1, x1); err != nil { return err; }
+ if err = internal_sqr(x0x0, x0); err != nil { return err; }
+ if err = internal_sqr(x1x1, x1); err != nil { return err; }
/*
Now calc (x1+x0)^2
*/
- if err = internal_add(t1, x0, x1); err != nil { return err; }
- if err = internal_sqr(t1, t1); err != nil { return err; }
+ if err = internal_add(t1, x0, x1); err != nil { return err; }
+ if err = internal_sqr(t1, t1); err != nil { return err; }
/*
Add x0y0
*/
- if err = internal_add(t2, x0x0, x1x1); err != nil { return err; }
- if err = internal_sub(t1, t1, t2); err != nil { return err; }
+ if err = internal_add(t2, x0x0, x1x1); err != nil { return err; }
+ if err = internal_sub(t1, t1, t2); err != nil { return err; }
/*
Shift by B.
*/
- if err = internal_shl_digit(t1, B); err != nil { return err; }
- if err = internal_shl_digit(x1x1, B * 2); err != nil { return err; }
- if err = internal_add(t1, t1, x0x0); err != nil { return err; }
- if err = internal_add(dest, t1, x1x1); err != nil { return err; }
+ if err = internal_shl_digit(t1, B); err != nil { return err; }
+ if err = internal_shl_digit(x1x1, B * 2); err != nil { return err; }
+ if err = internal_add(t1, t1, x0x0); err != nil { return err; }
+ if err = internal_add(dest, t1, x1x1); err != nil { return err; }
- return internal_clamp(dest);
+ return #force_inline internal_clamp(dest);
+}
+
+/*
+ Squaring using Toom-Cook 3-way algorithm.
+
+ Setup and interpolation from algorithm SQR_3 in Chung, Jaewook, and M. Anwar Hasan. "Asymmetric squaring formulae."
+ 18th IEEE Symposium on Computer Arithmetic (ARITH'07). IEEE, 2007.
+*/
+_private_int_sqr_toom :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+ context.allocator = allocator;
+
+ S0, a0, a1, a2 := &Int{}, &Int{}, &Int{}, &Int{};
+ defer destroy(S0, a0, a1, a2);
+
+ /*
+ Init temps.
+ */
+ if err = internal_zero(S0); err != nil { return err; }
+
+ /*
+ B
+ */
+ B := src.used / 3;
+
+ /*
+ a = a2 * x^2 + a1 * x + a0;
+ */
+ if err = internal_grow(a0, B); err != nil { return err; }
+ if err = internal_grow(a1, B); err != nil { return err; }
+ if err = internal_grow(a2, src.used - (2 * B)); err != nil { return err; }
+
+ a0.used = B;
+ a1.used = B;
+ a2.used = src.used - 2 * B;
+
+ #force_inline mem.copy_non_overlapping(&a0.digit[0], &src.digit[ 0], size_of(DIGIT) * a0.used);
+ #force_inline mem.copy_non_overlapping(&a1.digit[0], &src.digit[ B], size_of(DIGIT) * a1.used);
+ #force_inline mem.copy_non_overlapping(&a2.digit[0], &src.digit[2 * B], size_of(DIGIT) * a2.used);
+
+ internal_clamp(a0);
+ internal_clamp(a1);
+ internal_clamp(a2);
+
+ /** S0 = a0^2; */
+ if err = internal_sqr(S0, a0); err != nil { return err; }
+
+ /** \\S1 = (a2 + a1 + a0)^2 */
+ /** \\S2 = (a2 - a1 + a0)^2 */
+ /** \\S1 = a0 + a2; */
+ /** a0 = a0 + a2; */
+ if err = internal_add(a0, a0, a2); err != nil { return err; }
+ /** \\S2 = S1 - a1; */
+ /** b = a0 - a1; */
+ if err = internal_sub(dest, a0, a1); err != nil { return err; }
+ /** \\S1 = S1 + a1; */
+ /** a0 = a0 + a1; */
+ if err = internal_add(a0, a0, a1); err != nil { return err; }
+ /** \\S1 = S1^2; */
+ /** a0 = a0^2; */
+ if err = internal_sqr(a0, a0); err != nil { return err; }
+ /** \\S2 = S2^2; */
+ /** b = b^2; */
+ if err = internal_sqr(dest, dest); err != nil { return err; }
+ /** \\ S3 = 2 * a1 * a2 */
+ /** \\S3 = a1 * a2; */
+ /** a1 = a1 * a2; */
+ if err = internal_mul(a1, a1, a2); err != nil { return err; }
+ /** \\S3 = S3 << 1; */
+ /** a1 = a1 << 1; */
+ if err = internal_shl(a1, a1, 1); err != nil { return err; }
+ /** \\S4 = a2^2; */
+ /** a2 = a2^2; */
+ if err = internal_sqr(a2, a2); err != nil { return err; }
+ /** \\ tmp = (S1 + S2)/2 */
+ /** \\tmp = S1 + S2; */
+ /** b = a0 + b; */
+ if err = internal_add(dest, a0, dest); err != nil { return err; }
+ /** \\tmp = tmp >> 1; */
+ /** b = b >> 1; */
+ if err = internal_shr(dest, dest, 1); err != nil { return err; }
+ /** \\ S1 = S1 - tmp - S3 */
+ /** \\S1 = S1 - tmp; */
+ /** a0 = a0 - b; */
+ if err = internal_sub(a0, a0, dest); err != nil { return err; }
+ /** \\S1 = S1 - S3; */
+ /** a0 = a0 - a1; */
+ if err = internal_sub(a0, a0, a1); err != nil { return err; }
+ /** \\S2 = tmp - S4 -S0 */
+ /** \\S2 = tmp - S4; */
+ /** b = b - a2; */
+ if err = internal_sub(dest, dest, a2); err != nil { return err; }
+ /** \\S2 = S2 - S0; */
+ /** b = b - S0; */
+ if err = internal_sub(dest, dest, S0); err != nil { return err; }
+ /** \\P = S4*x^4 + S3*x^3 + S2*x^2 + S1*x + S0; */
+ /** P = a2*x^4 + a1*x^3 + b*x^2 + a0*x + S0; */
+ if err = internal_shl_digit( a2, 4 * B); err != nil { return err; }
+ if err = internal_shl_digit( a1, 3 * B); err != nil { return err; }
+ if err = internal_shl_digit(dest, 2 * B); err != nil { return err; }
+ if err = internal_shl_digit( a0, 1 * B); err != nil { return err; }
+
+ if err = internal_add(a2, a2, a1); err != nil { return err; }
+ if err = internal_add(dest, dest, a2); err != nil { return err; }
+ if err = internal_add(dest, dest, a0); err != nil { return err; }
+ if err = internal_add(dest, dest, S0); err != nil { return err; }
+ /** a^2 - P */
+
+ return #force_inline internal_clamp(dest);
}
/*