diff options
| author | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2021-08-11 10:50:59 +0200 |
|---|---|---|
| committer | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2021-08-11 20:59:54 +0200 |
| commit | 5f34ff9f9f6bdbc62d6111f85167229ac8440747 (patch) | |
| tree | 035233637be0b132c8dafef004f0d806ff0814a1 /core/math/big | |
| parent | 2b274fefbb214533399b804acce8977ecaa3afeb (diff) | |
big: Add `_private_int_sqr_toom`.
Diffstat (limited to 'core/math/big')
| -rw-r--r-- | core/math/big/common.odin | 3 | ||||
| -rw-r--r-- | core/math/big/example.odin | 14 | ||||
| -rw-r--r-- | core/math/big/internal.odin | 5 | ||||
| -rw-r--r-- | core/math/big/private.odin | 134 |
4 files changed, 131 insertions, 25 deletions
diff --git a/core/math/big/common.odin b/core/math/big/common.odin index 56b8a1083..a0cddb88e 100644 --- a/core/math/big/common.odin +++ b/core/math/big/common.odin @@ -40,6 +40,9 @@ SQR_TOOM_CUTOFF := _DEFAULT_SQR_TOOM_CUTOFF; It would also be cool if we collected some data across various processor families. This would let uss set reasonable defaults at runtime as this library initializes itself by using `cpuid` or the ARM equivalent. + + IMPORTANT: The 32_BIT path has largely gone untested. It needs to be tested and + debugged where necessary. */ _DEFAULT_MUL_KARATSUBA_CUTOFF :: #config(MUL_KARATSUBA_CUTOFF, 80); diff --git a/core/math/big/example.odin b/core/math/big/example.odin index 1d9a4cb3c..467c4d517 100644 --- a/core/math/big/example.odin +++ b/core/math/big/example.odin @@ -82,24 +82,22 @@ demo :: proc() { // if err = factorial(a, 850); err != nil { fmt.printf("factorial err: %v\n", err); return; } - foo := "615037959146039477924633848896619112832171971562900618409305032006863881436080"; - if err = atoi(a, foo, 10); err != nil { return; } - print("a: ", a, 10, true, true, true); - fmt.println(); + foo := "54fc32611b510b653a608aaf41ca6d8e6a927520c87124d6bc9df29e29dcafbb0096428ca4a48905a3b6c9f02c56983d6d14711e7f5ce433feca8fcf382cdbd76cd627c45bc55423b8aea7f1bf638e81a3182ccd8b937467ca3916b37e67d0d4a1f3a0400360e8b02211a61071549525c4a1d4b32bfc83381e00d7d977bcf8f76e74d7a5a9532b75adfe67b6511cb377fa2828f3d9f989b3a532e2ded695796052ec3073267c11270fd393087a0ddf02f480f31149ee0c889811a8e43c25b906c9be5627bab8ba8eeba80ebdbfa0c6fe988542398d17f9df13887ddf5b109fc70033b325ec79340bd3e8d0e9217d0095fa1d5ed8750b479e2f85dd15bba5ce8ab9376d7fba183435d6d7b67e244358464efea9f5f3311efca81e36a5875e484cba3bce9536c87a038a16b85817812afde4ac8592af4f0a34ecb2e35ec160755ae17c9ad5ebee8f8a3687a06ce6a8385c161c275d1f1c1e10b64eeab5a56d76d5574c7a19a177c4018ec61f85f636697f177160452535de3a751c61f2156cd33a61e4b290b79429286397f6e58ee4936d4f1fd911b6511215fc9690e21b6f0f0c315341d5a192c40d6543c330a92c2161639f915ae50751ecdfcf6b5a489b4a874867a559e962c5464f69e50916c7621d8c7941357883e0ac5ba44dcf5cfccaa6a83ae035d66d9f00de1f115ceae4bb87133ab03d960b1c29ec801c2ac880b65d6ffd676b2c0a0f32282c47cfa977fa03ba94939cc6ec8666be46e8ce6d8a473089fd313dad75bdf99b024ad2b2bec434a2bde6102adee4c10fc53836fcade7e5eeef09a1dbeec913e535ba39b05035316e81723b93d465bd952bf1faada1564111836f022c482be52568f0c061bf0666db8c33e7a4f05c5792d0914b4ef7b654735bc1d363ca15af78ad32c0db6247b5721517393d47ce267f3a7e888642c7f595d7c8e84f680d766551bde0f9002f040a688973d3498c0ab4443b33652639018d33e0e9fe97831ba1bf7531b4d69f84e1a99cd58105756dd78ba9524ed4d236387ce9211ecab98048a728f5526c44372e44ed7a6853eeacda09d659119b0082cec1c1a6295fd9c6439f95dc60a9467e4296c0babb0138dae22ef64716fbaf0cbcd4c3047b976ae923f75fc6f4303e2ffe2792ff367d48b82b162182700bd08622a0b3304a6b0e1154ae66dc3a1607fc03026a1b81a7ec904aa98c201551baf5da48d78916ab5acb88df7bc57d7b4f1e96716dbfe56366c20cd28c2ae9d007c7c65c7fb4b6ccf108d6f23215f27913bb57ed13f9a75fb017ca5242a46ef2248da0b60ca1cb5cd80219367ed61082b3ca6c07ae581430c334de073e241993cc7458be8017c4d8c1a2b9d9d2e4e72fff048c2a70b2f57e8259a0dac6cdab9dd4d7bcf69b401d52a67a656d62730672fa3b05aa83fbc97a2362bb6c218d9c659dbfd64e20cc7f0977ba2c2ad695202fee68aca07698d3e4a9677d8ea69099d1ccb113ddb695017d6ce0da36fa3c8f3fd22ad400f53335e1e965d3f3323575927273987cb9e798b222e125c4290a4bf751b8e5329a6690b32bedf6f90786511d55da1dabe963cff83686f454b1b1fa28c46abc20c4b500d9ecad4b3f3c446fb74eebb596aea1c6f079b43f167f228cab3ceab8965c34d5b0c760221ca441b6d1d75b5c39d7443fc58933bc2cadb1c5d1b92ac70179677feef4ee63a8f8fb76eeb20e705b58f138867db80bfaf4e2edba0a7f68e56e4650b2d6e8fdf634d74f7ba8df527fd3c3a03a987e9b73d2e50aea3020251c06d5629dd32790bc36f02c638b757441a813c101f3d81eb6a4d1edd147ba9dbbc2d3c419fa11bda7f3602dda5dc2f9a19fc7306660d20ecb1da3bb87b8415e340f2a3d1e843bc19059734e2417f1783c7859f00c6801dc51acf068d57777de7d100ae77bca7614a69efe557d574abb2b79d5d90ba621aa2b6460cdce64012cad33bf374989044bba0d280c26b71a1a2d7abf7bd75009a71db9f488c4b3b58db217689bc355d68817f6153736f597e3586780175960edf4fded6513aa8c6154cee94e278e9967d0f256a4b14a2cd188e4800f146118a35633476c665edc7460658dc7877f107bcd3108d"; + if err = atoi(a, foo, 16); err != nil { return; } + //print("a: ", a, 16, true, true, true); + //fmt.println(); { SCOPED_TIMING(.sqr); - if err = sqr(b, a); err != nil { fmt.printf("sqr err: %v\n", err); return; } + if err = _private_int_sqr_toom(b, a); err != nil { fmt.printf("sqr err: %v\n", err); return; } } fmt.println(); - print("b _sqr_karatsuba: ", b); - fmt.println(); bs, err = itoa(b, 16); defer delete(bs); if bs[:50] != "1C367982F3050A8A3C62A8A7906D165438B54B287AF3F15D36" { - fmt.println("sqr failed"); + fmt.println("sqr failed"); } } diff --git a/core/math/big/internal.odin b/core/math/big/internal.odin index 00b1ed7bf..aac2db9ec 100644 --- a/core/math/big/internal.odin +++ b/core/math/big/internal.odin @@ -36,8 +36,6 @@ import "core:mem" import "core:intrinsics" import rnd "core:math/rand" -// import "core:fmt" - /* Low-level addition, unsigned. Handbook of Applied Cryptography, algorithm 14.7. @@ -630,8 +628,7 @@ internal_int_mul :: proc(dest, src, multiplier: ^Int, allocator := context.alloc /* Use Toom-Cook? */ - // fmt.printf("_private_int_sqr_toom: %v\n", src.used); - err = #force_inline _private_int_sqr_karatsuba(dest, src); + err = #force_inline _private_int_sqr_toom(dest, src); } else if src.used >= SQR_KARATSUBA_CUTOFF { /* Karatsuba? diff --git a/core/math/big/private.odin b/core/math/big/private.odin index 3d8497c72..9b5677bb0 100644 --- a/core/math/big/private.odin +++ b/core/math/big/private.odin @@ -387,37 +387,145 @@ _private_int_sqr_karatsuba :: proc(dest, src: ^Int, allocator := context.allocat x0.used = B;
x1.used = src.used - B;
- internal_copy_digits(x0, src, x0.used);
+ #force_inline internal_copy_digits(x0, src, x0.used);
#force_inline mem.copy_non_overlapping(&x1.digit[0], &src.digit[B], size_of(DIGIT) * x1.used);
- internal_clamp(x0);
+ #force_inline internal_clamp(x0);
/*
Now calc the products x0*x0 and x1*x1.
*/
- if err = internal_sqr(x0x0, x0); err != nil { return err; }
- if err = internal_sqr(x1x1, x1); err != nil { return err; }
+ if err = internal_sqr(x0x0, x0); err != nil { return err; }
+ if err = internal_sqr(x1x1, x1); err != nil { return err; }
/*
Now calc (x1+x0)^2
*/
- if err = internal_add(t1, x0, x1); err != nil { return err; }
- if err = internal_sqr(t1, t1); err != nil { return err; }
+ if err = internal_add(t1, x0, x1); err != nil { return err; }
+ if err = internal_sqr(t1, t1); err != nil { return err; }
/*
Add x0y0
*/
- if err = internal_add(t2, x0x0, x1x1); err != nil { return err; }
- if err = internal_sub(t1, t1, t2); err != nil { return err; }
+ if err = internal_add(t2, x0x0, x1x1); err != nil { return err; }
+ if err = internal_sub(t1, t1, t2); err != nil { return err; }
/*
Shift by B.
*/
- if err = internal_shl_digit(t1, B); err != nil { return err; }
- if err = internal_shl_digit(x1x1, B * 2); err != nil { return err; }
- if err = internal_add(t1, t1, x0x0); err != nil { return err; }
- if err = internal_add(dest, t1, x1x1); err != nil { return err; }
+ if err = internal_shl_digit(t1, B); err != nil { return err; }
+ if err = internal_shl_digit(x1x1, B * 2); err != nil { return err; }
+ if err = internal_add(t1, t1, x0x0); err != nil { return err; }
+ if err = internal_add(dest, t1, x1x1); err != nil { return err; }
- return internal_clamp(dest);
+ return #force_inline internal_clamp(dest);
+}
+
+/*
+ Squaring using Toom-Cook 3-way algorithm.
+
+ Setup and interpolation from algorithm SQR_3 in Chung, Jaewook, and M. Anwar Hasan. "Asymmetric squaring formulae."
+ 18th IEEE Symposium on Computer Arithmetic (ARITH'07). IEEE, 2007.
+*/
+_private_int_sqr_toom :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+ context.allocator = allocator;
+
+ S0, a0, a1, a2 := &Int{}, &Int{}, &Int{}, &Int{};
+ defer destroy(S0, a0, a1, a2);
+
+ /*
+ Init temps.
+ */
+ if err = internal_zero(S0); err != nil { return err; }
+
+ /*
+ B
+ */
+ B := src.used / 3;
+
+ /*
+ a = a2 * x^2 + a1 * x + a0;
+ */
+ if err = internal_grow(a0, B); err != nil { return err; }
+ if err = internal_grow(a1, B); err != nil { return err; }
+ if err = internal_grow(a2, src.used - (2 * B)); err != nil { return err; }
+
+ a0.used = B;
+ a1.used = B;
+ a2.used = src.used - 2 * B;
+
+ #force_inline mem.copy_non_overlapping(&a0.digit[0], &src.digit[ 0], size_of(DIGIT) * a0.used);
+ #force_inline mem.copy_non_overlapping(&a1.digit[0], &src.digit[ B], size_of(DIGIT) * a1.used);
+ #force_inline mem.copy_non_overlapping(&a2.digit[0], &src.digit[2 * B], size_of(DIGIT) * a2.used);
+
+ internal_clamp(a0);
+ internal_clamp(a1);
+ internal_clamp(a2);
+
+ /** S0 = a0^2; */
+ if err = internal_sqr(S0, a0); err != nil { return err; }
+
+ /** \\S1 = (a2 + a1 + a0)^2 */
+ /** \\S2 = (a2 - a1 + a0)^2 */
+ /** \\S1 = a0 + a2; */
+ /** a0 = a0 + a2; */
+ if err = internal_add(a0, a0, a2); err != nil { return err; }
+ /** \\S2 = S1 - a1; */
+ /** b = a0 - a1; */
+ if err = internal_sub(dest, a0, a1); err != nil { return err; }
+ /** \\S1 = S1 + a1; */
+ /** a0 = a0 + a1; */
+ if err = internal_add(a0, a0, a1); err != nil { return err; }
+ /** \\S1 = S1^2; */
+ /** a0 = a0^2; */
+ if err = internal_sqr(a0, a0); err != nil { return err; }
+ /** \\S2 = S2^2; */
+ /** b = b^2; */
+ if err = internal_sqr(dest, dest); err != nil { return err; }
+ /** \\ S3 = 2 * a1 * a2 */
+ /** \\S3 = a1 * a2; */
+ /** a1 = a1 * a2; */
+ if err = internal_mul(a1, a1, a2); err != nil { return err; }
+ /** \\S3 = S3 << 1; */
+ /** a1 = a1 << 1; */
+ if err = internal_shl(a1, a1, 1); err != nil { return err; }
+ /** \\S4 = a2^2; */
+ /** a2 = a2^2; */
+ if err = internal_sqr(a2, a2); err != nil { return err; }
+ /** \\ tmp = (S1 + S2)/2 */
+ /** \\tmp = S1 + S2; */
+ /** b = a0 + b; */
+ if err = internal_add(dest, a0, dest); err != nil { return err; }
+ /** \\tmp = tmp >> 1; */
+ /** b = b >> 1; */
+ if err = internal_shr(dest, dest, 1); err != nil { return err; }
+ /** \\ S1 = S1 - tmp - S3 */
+ /** \\S1 = S1 - tmp; */
+ /** a0 = a0 - b; */
+ if err = internal_sub(a0, a0, dest); err != nil { return err; }
+ /** \\S1 = S1 - S3; */
+ /** a0 = a0 - a1; */
+ if err = internal_sub(a0, a0, a1); err != nil { return err; }
+ /** \\S2 = tmp - S4 -S0 */
+ /** \\S2 = tmp - S4; */
+ /** b = b - a2; */
+ if err = internal_sub(dest, dest, a2); err != nil { return err; }
+ /** \\S2 = S2 - S0; */
+ /** b = b - S0; */
+ if err = internal_sub(dest, dest, S0); err != nil { return err; }
+ /** \\P = S4*x^4 + S3*x^3 + S2*x^2 + S1*x + S0; */
+ /** P = a2*x^4 + a1*x^3 + b*x^2 + a0*x + S0; */
+ if err = internal_shl_digit( a2, 4 * B); err != nil { return err; }
+ if err = internal_shl_digit( a1, 3 * B); err != nil { return err; }
+ if err = internal_shl_digit(dest, 2 * B); err != nil { return err; }
+ if err = internal_shl_digit( a0, 1 * B); err != nil { return err; }
+
+ if err = internal_add(a2, a2, a1); err != nil { return err; }
+ if err = internal_add(dest, dest, a2); err != nil { return err; }
+ if err = internal_add(dest, dest, a0); err != nil { return err; }
+ if err = internal_add(dest, dest, S0); err != nil { return err; }
+ /** a^2 - P */
+
+ return #force_inline internal_clamp(dest);
}
/*
|