aboutsummaryrefslogtreecommitdiff
path: root/core/math/big/private.odin
diff options
context:
space:
mode:
authorJeroen van Rijn <Kelimion@users.noreply.github.com>2021-08-11 10:50:59 +0200
committerJeroen van Rijn <Kelimion@users.noreply.github.com>2021-08-11 20:59:54 +0200
commit5f34ff9f9f6bdbc62d6111f85167229ac8440747 (patch)
tree035233637be0b132c8dafef004f0d806ff0814a1 /core/math/big/private.odin
parent2b274fefbb214533399b804acce8977ecaa3afeb (diff)
big: Add `_private_int_sqr_toom`.
Diffstat (limited to 'core/math/big/private.odin')
-rw-r--r--core/math/big/private.odin134
1 files changed, 121 insertions, 13 deletions
diff --git a/core/math/big/private.odin b/core/math/big/private.odin
index 3d8497c72..9b5677bb0 100644
--- a/core/math/big/private.odin
+++ b/core/math/big/private.odin
@@ -387,37 +387,145 @@ _private_int_sqr_karatsuba :: proc(dest, src: ^Int, allocator := context.allocat
x0.used = B;
x1.used = src.used - B;
- internal_copy_digits(x0, src, x0.used);
+ #force_inline internal_copy_digits(x0, src, x0.used);
#force_inline mem.copy_non_overlapping(&x1.digit[0], &src.digit[B], size_of(DIGIT) * x1.used);
- internal_clamp(x0);
+ #force_inline internal_clamp(x0);
/*
Now calc the products x0*x0 and x1*x1.
*/
- if err = internal_sqr(x0x0, x0); err != nil { return err; }
- if err = internal_sqr(x1x1, x1); err != nil { return err; }
+ if err = internal_sqr(x0x0, x0); err != nil { return err; }
+ if err = internal_sqr(x1x1, x1); err != nil { return err; }
/*
Now calc (x1+x0)^2
*/
- if err = internal_add(t1, x0, x1); err != nil { return err; }
- if err = internal_sqr(t1, t1); err != nil { return err; }
+ if err = internal_add(t1, x0, x1); err != nil { return err; }
+ if err = internal_sqr(t1, t1); err != nil { return err; }
/*
Add x0y0
*/
- if err = internal_add(t2, x0x0, x1x1); err != nil { return err; }
- if err = internal_sub(t1, t1, t2); err != nil { return err; }
+ if err = internal_add(t2, x0x0, x1x1); err != nil { return err; }
+ if err = internal_sub(t1, t1, t2); err != nil { return err; }
/*
Shift by B.
*/
- if err = internal_shl_digit(t1, B); err != nil { return err; }
- if err = internal_shl_digit(x1x1, B * 2); err != nil { return err; }
- if err = internal_add(t1, t1, x0x0); err != nil { return err; }
- if err = internal_add(dest, t1, x1x1); err != nil { return err; }
+ if err = internal_shl_digit(t1, B); err != nil { return err; }
+ if err = internal_shl_digit(x1x1, B * 2); err != nil { return err; }
+ if err = internal_add(t1, t1, x0x0); err != nil { return err; }
+ if err = internal_add(dest, t1, x1x1); err != nil { return err; }
- return internal_clamp(dest);
+ return #force_inline internal_clamp(dest);
+}
+
+/*
+ Squaring using Toom-Cook 3-way algorithm.
+
+ Setup and interpolation from algorithm SQR_3 in Chung, Jaewook, and M. Anwar Hasan. "Asymmetric squaring formulae."
+ 18th IEEE Symposium on Computer Arithmetic (ARITH'07). IEEE, 2007.
+*/
+_private_int_sqr_toom :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) {
+ context.allocator = allocator;
+
+ S0, a0, a1, a2 := &Int{}, &Int{}, &Int{}, &Int{};
+ defer destroy(S0, a0, a1, a2);
+
+ /*
+ Init temps.
+ */
+ if err = internal_zero(S0); err != nil { return err; }
+
+ /*
+ B
+ */
+ B := src.used / 3;
+
+ /*
+ a = a2 * x^2 + a1 * x + a0;
+ */
+ if err = internal_grow(a0, B); err != nil { return err; }
+ if err = internal_grow(a1, B); err != nil { return err; }
+ if err = internal_grow(a2, src.used - (2 * B)); err != nil { return err; }
+
+ a0.used = B;
+ a1.used = B;
+ a2.used = src.used - 2 * B;
+
+ #force_inline mem.copy_non_overlapping(&a0.digit[0], &src.digit[ 0], size_of(DIGIT) * a0.used);
+ #force_inline mem.copy_non_overlapping(&a1.digit[0], &src.digit[ B], size_of(DIGIT) * a1.used);
+ #force_inline mem.copy_non_overlapping(&a2.digit[0], &src.digit[2 * B], size_of(DIGIT) * a2.used);
+
+ internal_clamp(a0);
+ internal_clamp(a1);
+ internal_clamp(a2);
+
+ /** S0 = a0^2; */
+ if err = internal_sqr(S0, a0); err != nil { return err; }
+
+ /** \\S1 = (a2 + a1 + a0)^2 */
+ /** \\S2 = (a2 - a1 + a0)^2 */
+ /** \\S1 = a0 + a2; */
+ /** a0 = a0 + a2; */
+ if err = internal_add(a0, a0, a2); err != nil { return err; }
+ /** \\S2 = S1 - a1; */
+ /** b = a0 - a1; */
+ if err = internal_sub(dest, a0, a1); err != nil { return err; }
+ /** \\S1 = S1 + a1; */
+ /** a0 = a0 + a1; */
+ if err = internal_add(a0, a0, a1); err != nil { return err; }
+ /** \\S1 = S1^2; */
+ /** a0 = a0^2; */
+ if err = internal_sqr(a0, a0); err != nil { return err; }
+ /** \\S2 = S2^2; */
+ /** b = b^2; */
+ if err = internal_sqr(dest, dest); err != nil { return err; }
+ /** \\ S3 = 2 * a1 * a2 */
+ /** \\S3 = a1 * a2; */
+ /** a1 = a1 * a2; */
+ if err = internal_mul(a1, a1, a2); err != nil { return err; }
+ /** \\S3 = S3 << 1; */
+ /** a1 = a1 << 1; */
+ if err = internal_shl(a1, a1, 1); err != nil { return err; }
+ /** \\S4 = a2^2; */
+ /** a2 = a2^2; */
+ if err = internal_sqr(a2, a2); err != nil { return err; }
+ /** \\ tmp = (S1 + S2)/2 */
+ /** \\tmp = S1 + S2; */
+ /** b = a0 + b; */
+ if err = internal_add(dest, a0, dest); err != nil { return err; }
+ /** \\tmp = tmp >> 1; */
+ /** b = b >> 1; */
+ if err = internal_shr(dest, dest, 1); err != nil { return err; }
+ /** \\ S1 = S1 - tmp - S3 */
+ /** \\S1 = S1 - tmp; */
+ /** a0 = a0 - b; */
+ if err = internal_sub(a0, a0, dest); err != nil { return err; }
+ /** \\S1 = S1 - S3; */
+ /** a0 = a0 - a1; */
+ if err = internal_sub(a0, a0, a1); err != nil { return err; }
+ /** \\S2 = tmp - S4 -S0 */
+ /** \\S2 = tmp - S4; */
+ /** b = b - a2; */
+ if err = internal_sub(dest, dest, a2); err != nil { return err; }
+ /** \\S2 = S2 - S0; */
+ /** b = b - S0; */
+ if err = internal_sub(dest, dest, S0); err != nil { return err; }
+ /** \\P = S4*x^4 + S3*x^3 + S2*x^2 + S1*x + S0; */
+ /** P = a2*x^4 + a1*x^3 + b*x^2 + a0*x + S0; */
+ if err = internal_shl_digit( a2, 4 * B); err != nil { return err; }
+ if err = internal_shl_digit( a1, 3 * B); err != nil { return err; }
+ if err = internal_shl_digit(dest, 2 * B); err != nil { return err; }
+ if err = internal_shl_digit( a0, 1 * B); err != nil { return err; }
+
+ if err = internal_add(a2, a2, a1); err != nil { return err; }
+ if err = internal_add(dest, dest, a2); err != nil { return err; }
+ if err = internal_add(dest, dest, a0); err != nil { return err; }
+ if err = internal_add(dest, dest, S0); err != nil { return err; }
+ /** a^2 - P */
+
+ return #force_inline internal_clamp(dest);
}
/*