6 files changed, 316 insertions, 280 deletions
diff --git a/core/math/big/api.odin b/core/math/big/api.odin
index d2eb21fc5..047e91335 100644
--- a/core/math/big/api.odin
+++ b/core/math/big/api.odin
@@ -8,7 +8,7 @@ package big
 	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
 	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
 
-	This file collects public procs into proc maps, as well as any of their aliases.
+	This file collects public proc maps and their aliases.
 /*
 
 */
@@ -19,15 +19,18 @@ package big
 */
 
 /*
-	err = add(dest, a, b);
+	High-level addition. Handles sign.
 */
 add :: proc {
 	/*
-		int_add :: proc(dest, a, b: ^Int) -> (err: Error)
+		int_add :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error)
 	*/
 	int_add,
 	/*
-		int_add_digit :: proc(dest, a: ^Int, digit: DIGIT) -> (err: Error)
+		Adds the unsigned `DIGIT` immediate to an `Int`, such that the
+		`DIGIT` doesn't have to be turned into an `Int` first.
+
+		int_add_digit :: proc(dest, a: ^Int, digit: DIGIT, allocator := context.allocator) -> (err: Error)
 	*/
 	int_add_digit,
 };
diff --git a/core/math/big/basic.odin b/core/math/big/basic.odin
index 86554748a..a06c275b8 100644
--- a/core/math/big/basic.odin
+++ b/core/math/big/basic.odin
@@ -24,6 +24,7 @@ import "core:intrinsics"
 	High-level addition. Handles sign.
 */
 int_add :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	if dest == nil || a == nil || b == nil { return .Invalid_Pointer; }
 	if err = clear_if_uninitialized(dest, a, b); err != nil { return err; }
 	/*
 		All parameters have been initialized.
@@ -38,6 +39,7 @@ int_add :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error
 	dest = a + digit;
 */
 int_add_digit :: proc(dest, a: ^Int, digit: DIGIT, allocator := context.allocator) -> (err: Error) {
+	if dest == nil || a == nil { return .Invalid_Pointer; }
 	if err = clear_if_uninitialized(a); err != nil { return err; }
 	/*
 		Grow destination as required.
@@ -54,6 +56,7 @@ int_add_digit :: proc(dest, a: ^Int, digit: DIGIT, allocator := context.allocato
 	High-level subtraction, dest = number - decrease. Handles signs.
 */
 int_sub :: proc(dest, number, decrease: ^Int, allocator := context.allocator) -> (err: Error) {
+	if dest == nil || number == nil || decrease == nil { return .Invalid_Pointer; }
 	if err = clear_if_uninitialized(dest, number, decrease); err != nil { return err; }
 	/*
 		All parameters have been initialized.
@@ -68,6 +71,7 @@ int_sub :: proc(dest, number, decrease: ^Int, allocator := context.allocator) ->
 	dest = a - digit;
 */
 int_sub_digit :: proc(dest, a: ^Int, digit: DIGIT, allocator := context.allocator) -> (err: Error) {
+	if dest == nil || a == nil { return .Invalid_Pointer; }
 	if err = clear_if_uninitialized(a); err != nil { return err; }
 	/*
 		Grow destination as required.
@@ -85,55 +89,14 @@ int_sub_digit :: proc(dest, a: ^Int, digit: DIGIT, allocator := context.allocato
 	dest = src >> 1
 */
 int_halve :: proc(dest, src: ^Int) -> (err: Error) {
-	dest := dest; src := src;
-	if err = clear_if_uninitialized(dest); err != nil {
-		return err;
-	}
+	if dest == nil || src == nil { return .Invalid_Pointer; }
+	if err = clear_if_uninitialized(dest, src); err != nil { return err; }
 	/*
 		Grow destination as required.
 	*/
-	if dest != src {
-		if err = grow(dest, src.used); err != nil {
-			return err;
-		}
-	}
-
-	old_used  := dest.used;
-	dest.used  = src.used;
-
-	/*
-		Carry
-	*/
-	fwd_carry := DIGIT(0);
-
-	for x := dest.used - 1; x >= 0; x -= 1 {
-		/*
-			Get the carry for the next iteration.
-		*/
-		src_digit := src.digit[x];
-		carry     := src_digit & 1;
-		/*
-			Shift the current digit, add in carry and store.
-		*/
-		dest.digit[x] = (src_digit >> 1) | (fwd_carry << (_DIGIT_BITS - 1));
-		/*
-			Forward carry to next iteration.
-		*/
-		fwd_carry = carry;
-	}
+	if dest != src { if err = grow(dest, src.used + 1); err != nil { return err; } }
 
-	zero_count := old_used - dest.used;
-	/*
-		Zero remainder.
-	*/
-	if zero_count > 0 {
-		mem.zero_slice(dest.digit[dest.used:][:zero_count]);
-	}
-	/*
-		Adjust dest.used based on leading zeroes.
-	*/
-	dest.sign = src.sign;
-	return clamp(dest);
+	return #force_inline internal_int_shr1(dest, src);
 }
 halve :: proc { int_halve, };
 shr1  :: halve;
@@ -143,251 +106,36 @@ shr1  :: halve;
 	dest = src << 1
 */
 int_double :: proc(dest, src: ^Int) -> (err: Error) {
-	dest := dest; src := src;
-	if err = clear_if_uninitialized(dest); err != nil {
-		return err;
-	}
+	if dest == nil || src == nil { return .Invalid_Pointer; }
+	if err = clear_if_uninitialized(dest, src); err != nil { return err; }
 	/*
 		Grow destination as required.
 	*/
-	if dest != src {
-		if err = grow(dest, src.used + 1); err != nil {
-			return err;
-		}
-	}
-
-	old_used  := dest.used;
-	dest.used  = src.used + 1;
-
-	/*
-		Forward carry
-	*/
-	carry := DIGIT(0);
-	for x := 0; x < src.used; x += 1 {
-		/*
-			Get what will be the *next* carry bit from the MSB of the current digit.
-		*/
-		src_digit := src.digit[x];
-		fwd_carry := src_digit >> (_DIGIT_BITS - 1);
-
-		/*
-			Now shift up this digit, add in the carry [from the previous]
-		*/
-		dest.digit[x] = (src_digit << 1 | carry) & _MASK;
+	if dest != src { if err = grow(dest, src.used + 1); err != nil { return err; } }
 
-		/*
-			Update carry
-		*/
-		carry = fwd_carry;
-	}
-	/*
-		New leading digit?
-	*/
-	if carry != 0 {
-		/*
-			Add a MSB which is always 1 at this point.
-		*/
-		dest.digit[dest.used] = 1;
-	}
-	zero_count := old_used - dest.used;
-	/*
-		Zero remainder.
-	*/
-	if zero_count > 0 {
-		mem.zero_slice(dest.digit[dest.used:][:zero_count]);
-	}
-	/*
-		Adjust dest.used based on leading zeroes.
-	*/
-	dest.sign = src.sign;
-	return clamp(dest);
+	return #force_inline internal_int_shl1(dest, src);
 }
 double :: proc { int_double, };
 shl1   :: double;
 
 /*
-	remainder = numerator % (1 << bits)
-*/
-int_mod_bits :: proc(remainder, numerator: ^Int, bits: int) -> (err: Error) {
-	if err = clear_if_uninitialized(remainder); err != nil { return err; }
-	if err = clear_if_uninitialized(numerator); err != nil { return err; }
-
-	if bits  < 0 { return .Invalid_Argument; }
-	if bits == 0 { return zero(remainder); }
-
-	/*
-		If the modulus is larger than the value, return the value.
-	*/
-	err = copy(remainder, numerator);
-	if bits >= (numerator.used * _DIGIT_BITS) || err != nil {
-		return;
-	}
-
-	/*
-		Zero digits above the last digit of the modulus.
-	*/
-	zero_count := (bits / _DIGIT_BITS) + 0 if (bits % _DIGIT_BITS == 0) else 1;
-	/*
-		Zero remainder.
-	*/
-	if zero_count > 0 {
-		mem.zero_slice(remainder.digit[zero_count:]);
-	}
-
-	/*
-		Clear the digit that is not completely outside/inside the modulus.
-	*/
-	remainder.digit[bits / _DIGIT_BITS] &= DIGIT(1 << DIGIT(bits % _DIGIT_BITS)) - DIGIT(1);
-	return clamp(remainder);
-}
-mod_bits :: proc { int_mod_bits, };
-
-/*
 	Multiply by a DIGIT.
 */
-int_mul_digit :: proc(dest, src: ^Int, multiplier: DIGIT) -> (err: Error) {
+int_mul_digit :: proc(dest, src: ^Int, multiplier: DIGIT, allocator := context.allocator) -> (err: Error) {
+	if dest == nil || src == nil { return .Invalid_Pointer; }
 	if err = clear_if_uninitialized(src, dest); err != nil { return err; }
 
-	if multiplier == 0 {
-		return zero(dest);
-	}
-	if multiplier == 1 {
-		return copy(dest, src);
-	}
-
-	/*
-		Power of two?
-	*/
-	if multiplier == 2 {
-		return double(dest, src);
-	}
-	if is_power_of_two(int(multiplier)) {
-		ix: int;
-		if ix, err = log(multiplier, 2); err != nil { return err; }
-		return shl(dest, src, ix);
-	}
-
-	/*
-		Ensure `dest` is big enough to hold `src` * `multiplier`.
-	*/
-	if err = grow(dest, max(src.used + 1, _DEFAULT_DIGIT_COUNT)); err != nil { return err; }
-
-	/*
-		Save the original used count.
-	*/
-	old_used := dest.used;
-	/*
-		Set the sign.
-	*/
-	dest.sign = src.sign;
-	/*
-		Set up carry.
-	*/
-	carry := _WORD(0);
-	/*
-		Compute columns.
-	*/
-	ix := 0;
-	for ; ix < src.used; ix += 1 {
-		/*
-			Compute product and carry sum for this term
-		*/
-		product := carry + _WORD(src.digit[ix]) * _WORD(multiplier);
-		/*
-			Mask off higher bits to get a single DIGIT.
-		*/
-		dest.digit[ix] = DIGIT(product & _WORD(_MASK));
-		/*
-			Send carry into next iteration
-		*/
-		carry = product >> _DIGIT_BITS;
-	}
-
-	/*
-		Store final carry [if any] and increment used.
-	*/
-	dest.digit[ix] = DIGIT(carry);
-	dest.used = src.used + 1;
-	/*
-		Zero unused digits.
-	*/
-	zero_count := old_used - dest.used;
-	if zero_count > 0 {
-		mem.zero_slice(dest.digit[zero_count:]);
-	}
-	return clamp(dest);
+	return #force_inline internal_int_mul_digit(dest, src, multiplier, allocator);
 }
 
 /*
 	High level multiplication (handles sign).
 */
-int_mul :: proc(dest, src, multiplier: ^Int) -> (err: Error) {
+int_mul :: proc(dest, src, multiplier: ^Int, allocator := context.allocator) -> (err: Error) {
+	if dest == nil || src == nil || multiplier == nil { return .Invalid_Pointer; }
 	if err = clear_if_uninitialized(dest, src, multiplier); err != nil { return err; }
 
-	/*
-		Early out for `multiplier` is zero; Set `dest` to zero.
-	*/
-	if z, _ := is_zero(multiplier); z { return zero(dest); }
-	if z, _ := is_zero(src);        z { return zero(dest); }
-
-	if src == multiplier {
-		/*
-			Do we need to square?
-		*/
-		if        false && src.used >= _SQR_TOOM_CUTOFF {
-			/* Use Toom-Cook? */
-			// err = s_mp_sqr_toom(a, c);
-		} else if false && src.used >= _SQR_KARATSUBA_CUTOFF {
-			/* Karatsuba? */
-			// err = s_mp_sqr_karatsuba(a, c);
-		} else if false && ((src.used * 2) + 1) < _WARRAY &&
-		                   src.used < (_MAX_COMBA / 2) {
-			/* Fast comba? */
-			// err = s_mp_sqr_comba(a, c);
-		} else {
-			err = _int_sqr(dest, src);
-		}
-	} else {
-		/*
-			Can we use the balance method? Check sizes.
-			* The smaller one needs to be larger than the Karatsuba cut-off.
-			* The bigger one needs to be at least about one `_MUL_KARATSUBA_CUTOFF` bigger
-			* to make some sense, but it depends on architecture, OS, position of the
-			* stars... so YMMV.
-			* Using it to cut the input into slices small enough for _mul_comba
-			* was actually slower on the author's machine, but YMMV.
-		*/
-
-		min_used := min(src.used, multiplier.used);
-		max_used := max(src.used, multiplier.used);
-		digits   := src.used + multiplier.used + 1;
-
-		if        false &&  min_used     >= _MUL_KARATSUBA_CUTOFF &&
-						    max_used / 2 >= _MUL_KARATSUBA_CUTOFF &&
-			/*
-				Not much effect was observed below a ratio of 1:2, but again: YMMV.
-			*/
-							max_used     >= 2 * min_used {
-			// err = s_mp_mul_balance(a,b,c);
-		} else if false && min_used >= _MUL_TOOM_CUTOFF {
-			// err = s_mp_mul_toom(a, b, c);
-		} else if false && min_used >= _MUL_KARATSUBA_CUTOFF {
-			// err = s_mp_mul_karatsuba(a, b, c);
-		} else if digits < _WARRAY && min_used <= _MAX_COMBA {
-			/*
-				Can we use the fast multiplier?
-				* The fast multiplier can be used if the output will
-				* have less than MP_WARRAY digits and the number of
-				* digits won't affect carry propagation
-			*/
-			err = _int_mul_comba(dest, src, multiplier, digits);
-		} else {
-			err = _int_mul(dest, src, multiplier, digits);
-		}
-	}
-	neg      := src.sign != multiplier.sign;
-	dest.sign = .Negative if dest.used > 0 && neg else .Zero_or_Positive;
-	return err;
+	return #force_inline internal_int_mul(dest, src, multiplier, allocator);
 }
 
 mul :: proc { int_mul, int_mul_digit, };
@@ -1219,6 +967,7 @@ _int_div_digit :: proc(quotient, numerator: ^Int, denominator: DIGIT) -> (remain
 	Function computing both GCD and (if target isn't `nil`) also LCM.
 */
 int_gcd_lcm :: proc(res_gcd, res_lcm, a, b: ^Int) -> (err: Error) {
+	if res_gcd == nil && res_lcm == nil { return nil; }
 	if err = clear_if_uninitialized(res_gcd, res_lcm, a, b); err != nil { return err; }
 
 	az, _ := is_zero(a); bz, _ := is_zero(b);
@@ -1435,6 +1184,43 @@ _int_gcd_lcm :: proc(res_gcd, res_lcm, a, b: ^Int) -> (err: Error) {
 	return err;
 }
 
+/*
+	remainder = numerator % (1 << bits)
+*/
+int_mod_bits :: proc(remainder, numerator: ^Int, bits: int) -> (err: Error) {
+	if err = clear_if_uninitialized(remainder); err != nil { return err; }
+	if err = clear_if_uninitialized(numerator); err != nil { return err; }
+
+	if bits  < 0 { return .Invalid_Argument; }
+	if bits == 0 { return zero(remainder); }
+
+	/*
+		If the modulus is larger than the value, return the value.
+	*/
+	err = copy(remainder, numerator);
+	if bits >= (numerator.used * _DIGIT_BITS) || err != nil {
+		return;
+	}
+
+	/*
+		Zero digits above the last digit of the modulus.
+	*/
+	zero_count := (bits / _DIGIT_BITS) + 0 if (bits % _DIGIT_BITS == 0) else 1;
+	/*
+		Zero remainder.
+	*/
+	if zero_count > 0 {
+		mem.zero_slice(remainder.digit[zero_count:]);
+	}
+
+	/*
+		Clear the digit that is not completely outside/inside the modulus.
+	*/
+	remainder.digit[bits / _DIGIT_BITS] &= DIGIT(1 << DIGIT(bits % _DIGIT_BITS)) - DIGIT(1);
+	return clamp(remainder);
+}
+mod_bits :: proc { int_mod_bits, };
+
 
 when MATH_BIG_FORCE_64_BIT || (!MATH_BIG_FORCE_32_BIT && size_of(rawptr) == 8) {
 	_factorial_table := [35]_WORD{
diff --git a/core/math/big/helpers.odin b/core/math/big/helpers.odin
index 054b05abd..eacd15283 100644
--- a/core/math/big/helpers.odin
+++ b/core/math/big/helpers.odin
@@ -287,7 +287,7 @@ int_grow :: proc(a: ^Int, digits: int, allow_shrink := false, allocator := conte
 
 	/*
 		If not yet iniialized, initialize the `digit` backing with the allocator we were passed.
-		Otherwise, `[dynamic]DIGIT` already knows what allocator was used for it, so reuse will do the right thing.
+		Otherwise, `[dynamic]DIGIT` already knows what allocator was used for it, so resize will do the right thing.
 	*/
 	if raw.cap == 0 {
 		a.digit = mem.make_dynamic_array_len_cap([dynamic]DIGIT, needed, needed, allocator);
@@ -314,7 +314,7 @@ int_clear :: proc(a: ^Int, minimize := false, allocator := context.allocator) ->
 
 	raw := transmute(mem.Raw_Dynamic_Array)a.digit;
 	if raw.cap != 0 {
-		mem.zero_slice(a.digit[:]);
+		mem.zero_slice(a.digit[:a.used]);
 	}
 	a.sign = .Zero_or_Positive;
 	a.used = 0;
@@ -328,7 +328,7 @@ zero  :: clear;
 	Set the `Int` to 1 and optionally shrink it to the minimum backing size.
 */
 int_one :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) {
-	return set(a, 1, minimize, allocator);
+	return copy(a, ONE, minimize, allocator);
 }
 one :: proc { int_one, };
 
@@ -597,6 +597,7 @@ _zero_unused :: proc(a: ^Int) {
 
 clear_if_uninitialized_single :: proc(arg: ^Int) -> (err: Error) {
 	if !is_initialized(arg) {
+		if arg == nil { return nil; }
 		return grow(arg, _DEFAULT_DIGIT_COUNT);
 	}
 	return err;
@@ -604,7 +605,8 @@ clear_if_uninitialized_single :: proc(arg: ^Int) -> (err: Error) {
 
 clear_if_uninitialized_multi :: proc(args: ..^Int) -> (err: Error) {
 	for i in args {
-		if i != nil && !is_initialized(i) {
+		if i == nil { continue; }
+		if !is_initialized(i) {
 			e := grow(i, _DEFAULT_DIGIT_COUNT);
 			if e != nil { err = e; }
 		}
@@ -690,9 +692,9 @@ initialize_constants :: proc() -> (res: int) {
 		We set these special values to -1 or 1 so they don't get mistake for zero accidentally.
 		This allows for shortcut tests of is_zero as .used == 0.
 	*/
+	set(      NAN,  1);       NAN.flags = {.Immutable, .NaN};
 	set(      INF,  1);       INF.flags = {.Immutable, .Inf};
 	set(      INF, -1); MINUS_INF.flags = {.Immutable, .Inf};
-	set(      NAN,  1);       NAN.flags = {.Immutable, .NaN};
 
 	return #config(MUL_KARATSUBA_CUTOFF, _DEFAULT_MUL_KARATSUBA_CUTOFF);
 }
diff --git a/core/math/big/internal.odin b/core/math/big/internal.odin
index 7f9b2f7fe..df0acbf43 100644
--- a/core/math/big/internal.odin
+++ b/core/math/big/internal.odin
@@ -443,4 +443,241 @@ internal_int_sub_digit :: proc(dest, number: ^Int, digit: DIGIT) -> (err: Error)
 	return clamp(dest);
 }
 
-internal_sub :: proc { internal_int_sub_signed, internal_int_sub_digit, };
-\ No newline at end of file
+internal_sub :: proc { internal_int_sub_signed, internal_int_sub_digit, };
+
+/*
+	dest = src  / 2
+	dest = src >> 1
+*/
+internal_int_shr1 :: proc(dest, src: ^Int) -> (err: Error) {
+	old_used  := dest.used; dest.used = src.used;
+	/*
+		Carry
+	*/
+	fwd_carry := DIGIT(0);
+
+	#no_bounds_check for x := dest.used - 1; x >= 0; x -= 1 {
+		/*
+			Get the carry for the next iteration.
+		*/
+		src_digit := src.digit[x];
+		carry     := src_digit & 1;
+		/*
+			Shift the current digit, add in carry and store.
+		*/
+		dest.digit[x] = (src_digit >> 1) | (fwd_carry << (_DIGIT_BITS - 1));
+		/*
+			Forward carry to next iteration.
+		*/
+		fwd_carry = carry;
+	}
+
+	zero_count := old_used - dest.used;
+	/*
+		Zero remainder.
+	*/
+	if zero_count > 0 {
+		mem.zero_slice(dest.digit[dest.used:][:zero_count]);
+	}
+	/*
+		Adjust dest.used based on leading zeroes.
+	*/
+	dest.sign = src.sign;
+	return clamp(dest);	
+}
+
+/*
+	dest = src  * 2
+	dest = src << 1
+*/
+internal_int_shl1 :: proc(dest, src: ^Int) -> (err: Error) {
+	old_used  := dest.used; dest.used  = src.used + 1;
+
+	/*
+		Forward carry
+	*/
+	carry := DIGIT(0);
+	#no_bounds_check for x := 0; x < src.used; x += 1 {
+		/*
+			Get what will be the *next* carry bit from the MSB of the current digit.
+		*/
+		src_digit := src.digit[x];
+		fwd_carry := src_digit >> (_DIGIT_BITS - 1);
+
+		/*
+			Now shift up this digit, add in the carry [from the previous]
+		*/
+		dest.digit[x] = (src_digit << 1 | carry) & _MASK;
+
+		/*
+			Update carry
+		*/
+		carry = fwd_carry;
+	}
+	/*
+		New leading digit?
+	*/
+	if carry != 0 {
+		/*
+			Add a MSB which is always 1 at this point.
+		*/
+		dest.digit[dest.used] = 1;
+	}
+	zero_count := old_used - dest.used;
+	/*
+		Zero remainder.
+	*/
+	if zero_count > 0 {
+		mem.zero_slice(dest.digit[dest.used:][:zero_count]);
+	}
+	/*
+		Adjust dest.used based on leading zeroes.
+	*/
+	dest.sign = src.sign;
+	return clamp(dest);
+}
+
+/*
+	Multiply by a DIGIT.
+*/
+internal_int_mul_digit :: proc(dest, src: ^Int, multiplier: DIGIT, allocator := context.allocator) -> (err: Error) {
+	if multiplier == 0 {
+		return zero(dest);
+	}
+	if multiplier == 1 {
+		return copy(dest, src);
+	}
+
+	/*
+		Power of two?
+	*/
+	if multiplier == 2 {
+		return #force_inline shl1(dest, src);
+	}
+	if is_power_of_two(int(multiplier)) {
+		ix: int;
+		if ix, err = log(multiplier, 2); err != nil { return err; }
+		return shl(dest, src, ix);
+	}
+
+	/*
+		Ensure `dest` is big enough to hold `src` * `multiplier`.
+	*/
+	if err = grow(dest, max(src.used + 1, _DEFAULT_DIGIT_COUNT), false, allocator); err != nil { return err; }
+
+	/*
+		Save the original used count.
+	*/
+	old_used := dest.used;
+	/*
+		Set the sign.
+	*/
+	dest.sign = src.sign;
+	/*
+		Set up carry.
+	*/
+	carry := _WORD(0);
+	/*
+		Compute columns.
+	*/
+	ix := 0;
+	#no_bounds_check for ; ix < src.used; ix += 1 {
+		/*
+			Compute product and carry sum for this term
+		*/
+		product := carry + _WORD(src.digit[ix]) * _WORD(multiplier);
+		/*
+			Mask off higher bits to get a single DIGIT.
+		*/
+		dest.digit[ix] = DIGIT(product & _WORD(_MASK));
+		/*
+			Send carry into next iteration
+		*/
+		carry = product >> _DIGIT_BITS;
+	}
+
+	/*
+		Store final carry [if any] and increment used.
+	*/
+	dest.digit[ix] = DIGIT(carry);
+	dest.used = src.used + 1;
+	/*
+		Zero unused digits.
+	*/
+	zero_count := old_used - dest.used;
+	if zero_count > 0 {
+		mem.zero_slice(dest.digit[zero_count:]);
+	}
+	return clamp(dest);
+}
+
+/*
+	High level multiplication (handles sign).
+*/
+internal_int_mul :: proc(dest, src, multiplier: ^Int, allocator := context.allocator) -> (err: Error) {
+	/*
+		Early out for `multiplier` is zero; Set `dest` to zero.
+	*/
+	if multiplier.used == 0 || src.used == 0 { return zero(dest); }
+
+	if src == multiplier {
+		/*
+			Do we need to square?
+		*/
+		if        false && src.used >= _SQR_TOOM_CUTOFF {
+			/* Use Toom-Cook? */
+			// err = s_mp_sqr_toom(a, c);
+		} else if false && src.used >= _SQR_KARATSUBA_CUTOFF {
+			/* Karatsuba? */
+			// err = s_mp_sqr_karatsuba(a, c);
+		} else if false && ((src.used * 2) + 1) < _WARRAY &&
+		                   src.used < (_MAX_COMBA / 2) {
+			/* Fast comba? */
+			// err = s_mp_sqr_comba(a, c);
+		} else {
+			err = _int_sqr(dest, src);
+		}
+	} else {
+		/*
+			Can we use the balance method? Check sizes.
+			* The smaller one needs to be larger than the Karatsuba cut-off.
+			* The bigger one needs to be at least about one `_MUL_KARATSUBA_CUTOFF` bigger
+			* to make some sense, but it depends on architecture, OS, position of the
+			* stars... so YMMV.
+			* Using it to cut the input into slices small enough for _mul_comba
+			* was actually slower on the author's machine, but YMMV.
+		*/
+
+		min_used := min(src.used, multiplier.used);
+		max_used := max(src.used, multiplier.used);
+		digits   := src.used + multiplier.used + 1;
+
+		if        false &&  min_used     >= _MUL_KARATSUBA_CUTOFF &&
+						    max_used / 2 >= _MUL_KARATSUBA_CUTOFF &&
+			/*
+				Not much effect was observed below a ratio of 1:2, but again: YMMV.
+			*/
+							max_used     >= 2 * min_used {
+			// err = s_mp_mul_balance(a,b,c);
+		} else if false && min_used >= _MUL_TOOM_CUTOFF {
+			// err = s_mp_mul_toom(a, b, c);
+		} else if false && min_used >= _MUL_KARATSUBA_CUTOFF {
+			// err = s_mp_mul_karatsuba(a, b, c);
+		} else if digits < _WARRAY && min_used <= _MAX_COMBA {
+			/*
+				Can we use the fast multiplier?
+				* The fast multiplier can be used if the output will
+				* have less than MP_WARRAY digits and the number of
+				* digits won't affect carry propagation
+			*/
+			err = _int_mul_comba(dest, src, multiplier, digits);
+		} else {
+			err = _int_mul(dest, src, multiplier, digits);
+		}
+	}
+	neg      := src.sign != multiplier.sign;
+	dest.sign = .Negative if dest.used > 0 && neg else .Zero_or_Positive;
+	return err;
+}
+
+internal_mul :: proc { internal_int_mul, internal_int_mul_digit, };
+\ No newline at end of file
diff --git a/core/math/big/test.odin b/core/math/big/test.odin
index 19185da77..ae105969c 100644
--- a/core/math/big/test.odin
+++ b/core/math/big/test.odin
@@ -24,6 +24,11 @@ PyRes :: struct {
 	err: Error,
 }
 
+@export test_initialize_constants :: proc "c" () -> (res: int) {
+	context = runtime.default_context();
+	return initialize_constants();
+}
+
 @export test_error_string :: proc "c" (err: Error) -> (res: cstring) {
 	context = runtime.default_context();
 	es := Error_String;
diff --git a/core/math/big/test.py b/core/math/big/test.py
index f7e341ce7..836f03353 100644
--- a/core/math/big/test.py
+++ b/core/math/big/test.py
@@ -113,6 +113,9 @@ def load(export_name, args, res):
 class Res(Structure):
 	_fields_ = [("res", c_char_p), ("err", c_uint64)]
 
+initialize_constants = load(l.test_initialize_constants, [], c_uint64)
+initialize_constants()
+
 error_string = load(l.test_error_string, [c_byte], c_char_p)
 
 add  = load(l.test_add, [c_char_p, c_char_p], Res)