From b56a0e0f0346237a3254045ee68e539cfcb11be1 Mon Sep 17 00:00:00 2001
From: gingerBill <bill@gingerbill.org>
Date: Sun, 2 Jun 2024 23:29:43 +0100
Subject: Remove `libm` dependency in `core:math` where possible

---
 core/math/math.odin          |  32 ++++++-
 core/math/math_basic.odin    | 200 ---------------------------------------
 core/math/math_basic_js.odin |  47 ----------
 core/math/math_fmuladd.odin  | 218 +++++++++++++++++++++++++++++++++++++++++++
 core/math/math_ln.odin       | 118 +++++++++++++++++++++++
 core/math/math_pow.odin      | 210 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 575 insertions(+), 250 deletions(-)
 delete mode 100644 core/math/math_basic.odin
 delete mode 100644 core/math/math_basic_js.odin
 create mode 100644 core/math/math_fmuladd.odin
 create mode 100644 core/math/math_ln.odin
 create mode 100644 core/math/math_pow.odin

diff --git a/core/math/math.odin b/core/math/math.odin
index 8d85c2381..534ef35f7 100644
--- a/core/math/math.odin
+++ b/core/math/math.odin
@@ -14,10 +14,10 @@ Float_Class :: enum {
 	Neg_Inf,   // negative infinity
 }
 
-TAU          :: 6.28318530717958647692528676655900576
-PI           :: 3.14159265358979323846264338327950288
+TAU :: 6.28318530717958647692528676655900576
+PI  :: 3.14159265358979323846264338327950288
 
-E            :: 2.71828182845904523536
+E   :: 2.71828182845904523536
 
 τ :: TAU
 π :: PI
@@ -42,6 +42,32 @@ min :: builtin.min
 max :: builtin.max
 clamp :: builtin.clamp
 
+
+@(private)
+IS_WASM :: ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
+
+@(require_results)
+sqrt_f16 :: proc "contextless" (x: f16) -> f16 {
+	when IS_WASM {
+		return f16(sqrt_f64(f64(x)))
+	} else {
+		return intrinsics.sqrt(x)
+	}
+}
+@(require_results)
+sqrt_f32 :: proc "contextless" (x: f32) -> f32 {
+	when IS_WASM {
+		return f32(sqrt_f64(f64(x)))
+	} else {
+		return intrinsics.sqrt(x)
+	}
+}
+@(require_results)
+sqrt_f64 :: proc "contextless" (x: f64) -> f64 {
+	return intrinsics.sqrt(x)
+}
+
+
 @(require_results) sqrt_f16le :: proc "contextless" (x: f16le) -> f16le { return #force_inline f16le(sqrt_f16(f16(x))) }
 @(require_results) sqrt_f16be :: proc "contextless" (x: f16be) -> f16be { return #force_inline f16be(sqrt_f16(f16(x))) }
 @(require_results) sqrt_f32le :: proc "contextless" (x: f32le) -> f32le { return #force_inline f32le(sqrt_f32(f32(x))) }
diff --git a/core/math/math_basic.odin b/core/math/math_basic.odin
deleted file mode 100644
index 2c73232cc..000000000
--- a/core/math/math_basic.odin
+++ /dev/null
@@ -1,200 +0,0 @@
-//+build !js
-package math
-
-import "base:intrinsics"
-
-@(default_calling_convention="none", private="file")
-foreign _ {
-	@(link_name="llvm.pow.f16", require_results)
-	_pow_f16 :: proc(x, power: f16) -> f16 ---
-	@(link_name="llvm.pow.f32", require_results)
-	_pow_f32 :: proc(x, power: f32) -> f32 ---
-	@(link_name="llvm.pow.f64", require_results)
-	_pow_f64 :: proc(x, power: f64) -> f64 ---
-
-	@(link_name="llvm.fmuladd.f16", require_results)
-	_fmuladd_f16 :: proc(a, b, c: f16) -> f16 ---
-	@(link_name="llvm.fmuladd.f32", require_results)
-	_fmuladd_f32 :: proc(a, b, c: f32) -> f32 ---
-	@(link_name="llvm.fmuladd.f64", require_results)
-	_fmuladd_f64 :: proc(a, b, c: f64) -> f64 ---
-
-	@(link_name="llvm.exp.f16", require_results)
-	_exp_f16 :: proc(x: f16) -> f16 ---
-	@(link_name="llvm.exp.f32", require_results)
-	_exp_f32 :: proc(x: f32) -> f32 ---
-	@(link_name="llvm.exp.f64", require_results)
-	_exp_f64 :: proc(x: f64) -> f64 ---
-}
-
-
-@(require_results)
-pow_f16 :: proc "contextless" (x, power: f16) -> f16 {
-	return _pow_f16(x, power)
-}
-@(require_results)
-pow_f32 :: proc "contextless" (x, power: f32) -> f32 {
-	return _pow_f32(x, power)
-}
-@(require_results)
-pow_f64 :: proc "contextless" (x, power: f64) -> f64 {
-	return _pow_f64(x, power)
-}
-
-@(require_results)
-fmuladd_f16 :: proc "contextless" (a, b, c: f16) -> f16 {
-	return _fmuladd_f16(a, b, c)
-}
-@(require_results)
-fmuladd_f32 :: proc "contextless" (a, b, c: f32) -> f32 {
-	return _fmuladd_f32(a, b, c)
-}
-@(require_results)
-fmuladd_f64 :: proc "contextless" (a, b, c: f64) -> f64 {
-	return _fmuladd_f64(a, b, c)
-}
-
-@(require_results)
-exp_f16 :: proc "contextless" (x: f16) -> f16 {
-	return _exp_f16(x)
-}
-@(require_results)
-exp_f32 :: proc "contextless" (x: f32) -> f32 {
-	return _exp_f32(x)
-}
-@(require_results)
-exp_f64 :: proc "contextless" (x: f64) -> f64 {
-	return _exp_f64(x)
-}
-
-
-@(require_results)
-sqrt_f16 :: proc "contextless" (x: f16) -> f16 {
-	return intrinsics.sqrt(x)
-}
-@(require_results)
-sqrt_f32 :: proc "contextless" (x: f32) -> f32 {
-	return intrinsics.sqrt(x)
-}
-@(require_results)
-sqrt_f64 :: proc "contextless" (x: f64) -> f64 {
-	return intrinsics.sqrt(x)
-}
-
-
-
-@(require_results)
-ln_f64 :: proc "contextless" (x: f64) -> f64 {
-	// The original C code, the long comment, and the constants
-	// below are from FreeBSD's /usr/src/lib/msun/src/e_log.c
-	// and came with this notice.
-	//
-	// ====================================================
-	// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
-	//
-	// Developed at SunPro, a Sun Microsystems, Inc. business.
-	// Permission to use, copy, modify, and distribute this
-	// software is freely granted, provided that this notice
-	// is preserved.
-	// ====================================================
-	//
-	// __ieee754_log(x)
-	// Return the logarithm of x
-	//
-	// Method :
-	//   1. Argument Reduction: find k and f such that
-	//			x = 2**k * (1+f),
-	//	   where  sqrt(2)/2 < 1+f < sqrt(2) .
-	//
-	//   2. Approximation of log(1+f).
-	//	Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
-	//		 = 2s + 2/3 s**3 + 2/5 s**5 + .....,
-	//	     	 = 2s + s*R
-	//      We use a special Reme algorithm on [0,0.1716] to generate
-	//	a polynomial of degree 14 to approximate R.  The maximum error
-	//	of this polynomial approximation is bounded by 2**-58.45. In
-	//	other words,
-	//		        2      4      6      8      10      12      14
-	//	    R(z) ~ L1*s +L2*s +L3*s +L4*s +L5*s  +L6*s  +L7*s
-	//	(the values of L1 to L7 are listed in the program) and
-	//	    |      2          14          |     -58.45
-	//	    | L1*s +...+L7*s    -  R(z) | <= 2
-	//	    |                             |
-	//	Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
-	//	In order to guarantee error in log below 1ulp, we compute log by
-	//		log(1+f) = f - s*(f - R)		(if f is not too large)
-	//		log(1+f) = f - (hfsq - s*(hfsq+R)).	(better accuracy)
-	//
-	//	3. Finally,  log(x) = k*Ln2 + log(1+f).
-	//			    = k*Ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*Ln2_lo)))
-	//	   Here Ln2 is split into two floating point number:
-	//			Ln2_hi + Ln2_lo,
-	//	   where n*Ln2_hi is always exact for |n| < 2000.
-	//
-	// Special cases:
-	//	log(x) is NaN with signal if x < 0 (including -INF) ;
-	//	log(+INF) is +INF; log(0) is -INF with signal;
-	//	log(NaN) is that NaN with no signal.
-	//
-	// Accuracy:
-	//	according to an error analysis, the error is always less than
-	//	1 ulp (unit in the last place).
-	//
-	// Constants:
-	// The hexadecimal values are the intended ones for the following
-	// constants. The decimal values may be used, provided that the
-	// compiler will convert from decimal to binary accurately enough
-	// to produce the hexadecimal values shown.
-	
-	LN2_HI :: 0h3fe62e42_fee00000 // 6.93147180369123816490e-01
-	LN2_LO :: 0h3dea39ef_35793c76 // 1.90821492927058770002e-10
-	L1     :: 0h3fe55555_55555593 // 6.666666666666735130e-01
-	L2     :: 0h3fd99999_9997fa04 // 3.999999999940941908e-01
-	L3     :: 0h3fd24924_94229359 // 2.857142874366239149e-01
-	L4     :: 0h3fcc71c5_1d8e78af // 2.222219843214978396e-01
-	L5     :: 0h3fc74664_96cb03de // 1.818357216161805012e-01
-	L6     :: 0h3fc39a09_d078c69f // 1.531383769920937332e-01
-	L7     :: 0h3fc2f112_df3e5244 // 1.479819860511658591e-01
-	
-	switch {
-	case is_nan(x) || is_inf(x, 1):
-		return x
-	case x < 0:
-		return nan_f64()
-	case x == 0:
-		return inf_f64(-1)
-	}
-
-	// reduce
-	f1, ki := frexp(x)
-	if f1 < SQRT_TWO/2 {
-		f1 *= 2
-		ki -= 1
-	}
-	f := f1 - 1
-	k := f64(ki)
-
-	// compute
-	s := f / (2 + f)
-	s2 := s * s
-	s4 := s2 * s2
-	t1 := s2 * (L1 + s4*(L3+s4*(L5+s4*L7)))
-	t2 := s4 * (L2 + s4*(L4+s4*L6))
-	R := t1 + t2
-	hfsq := 0.5 * f * f
-	return k*LN2_HI - ((hfsq - (s*(hfsq+R) + k*LN2_LO)) - f)
-}
-
-@(require_results) ln_f16   :: proc "contextless" (x: f16)   -> f16   { return #force_inline f16(ln_f64(f64(x))) }
-@(require_results) ln_f32   :: proc "contextless" (x: f32)   -> f32   { return #force_inline f32(ln_f64(f64(x))) }
-@(require_results) ln_f16le :: proc "contextless" (x: f16le) -> f16le { return #force_inline f16le(ln_f64(f64(x))) }
-@(require_results) ln_f16be :: proc "contextless" (x: f16be) -> f16be { return #force_inline f16be(ln_f64(f64(x))) }
-@(require_results) ln_f32le :: proc "contextless" (x: f32le) -> f32le { return #force_inline f32le(ln_f64(f64(x))) }
-@(require_results) ln_f32be :: proc "contextless" (x: f32be) -> f32be { return #force_inline f32be(ln_f64(f64(x))) }
-@(require_results) ln_f64le :: proc "contextless" (x: f64le) -> f64le { return #force_inline f64le(ln_f64(f64(x))) }
-@(require_results) ln_f64be :: proc "contextless" (x: f64be) -> f64be { return #force_inline f64be(ln_f64(f64(x))) }
-ln :: proc{
-	ln_f16, ln_f16le, ln_f16be,
-	ln_f32, ln_f32le, ln_f32be,
-	ln_f64, ln_f64le, ln_f64be,
-}
\ No newline at end of file
diff --git a/core/math/math_basic_js.odin b/core/math/math_basic_js.odin
deleted file mode 100644
index df68da1b8..000000000
--- a/core/math/math_basic_js.odin
+++ /dev/null
@@ -1,47 +0,0 @@
-//+build js
-package math
-
-import "base:intrinsics"
-
-foreign import "odin_env"
-
-@(default_calling_convention="c")
-foreign odin_env {
-	@(link_name="pow", require_results)
-	pow_f64 :: proc(x, power: f64) -> f64 ---
-	@(link_name="fmuladd", require_results)
-	fmuladd_f64 :: proc(a, b, c: f64) -> f64 ---
-	@(link_name="ln", require_results)
-	ln_f64 :: proc(x: f64) -> f64 ---
-	@(link_name="exp", require_results)
-	exp_f64 :: proc(x: f64) -> f64 ---
-}
-
-@(require_results)
-sqrt_f64 :: proc "contextless" (x: f64) -> f64 {
-	return intrinsics.sqrt(x)
-}
-
-@(require_results) sqrt_f16    :: proc "c" (x: f16) -> f16             { return f16(sqrt_f64(f64(x)))                    }
-@(require_results) pow_f16     :: proc "c" (x, power: f16) -> f16      { return f16(pow_f64(f64(x), f64(power)))         }
-@(require_results) fmuladd_f16 :: proc "c" (a, b, c: f16) -> f16       { return f16(fmuladd_f64(f64(a), f64(a), f64(c))) }
-@(require_results) ln_f16      :: proc "c" (x: f16) -> f16             { return f16(ln_f64(f64(x)))                      }
-@(require_results) exp_f16     :: proc "c" (x: f16) -> f16             { return f16(exp_f64(f64(x)))                     }
-
-@(require_results) sqrt_f32    :: proc "c" (x: f32) -> f32             { return f32(sqrt_f64(f64(x)))                    }
-@(require_results) pow_f32     :: proc "c" (x, power: f32) -> f32      { return f32(pow_f64(f64(x), f64(power)))         }
-@(require_results) fmuladd_f32 :: proc "c" (a, b, c: f32) -> f32       { return f32(fmuladd_f64(f64(a), f64(a), f64(c))) }
-@(require_results) ln_f32      :: proc "c" (x: f32) -> f32             { return f32(ln_f64(f64(x)))                      }
-@(require_results) exp_f32     :: proc "c" (x: f32) -> f32             { return f32(exp_f64(f64(x)))                     }
-
-@(require_results) ln_f16le :: proc "contextless" (x: f16le) -> f16le { return #force_inline f16le(ln_f64(f64(x))) }
-@(require_results) ln_f16be :: proc "contextless" (x: f16be) -> f16be { return #force_inline f16be(ln_f64(f64(x))) }
-@(require_results) ln_f32le :: proc "contextless" (x: f32le) -> f32le { return #force_inline f32le(ln_f64(f64(x))) }
-@(require_results) ln_f32be :: proc "contextless" (x: f32be) -> f32be { return #force_inline f32be(ln_f64(f64(x))) }
-@(require_results) ln_f64le :: proc "contextless" (x: f64le) -> f64le { return #force_inline f64le(ln_f64(f64(x))) }
-@(require_results) ln_f64be :: proc "contextless" (x: f64be) -> f64be { return #force_inline f64be(ln_f64(f64(x))) }
-ln :: proc{
-	ln_f16, ln_f16le, ln_f16be,
-	ln_f32, ln_f32le, ln_f32be,
-	ln_f64, ln_f64le, ln_f64be,
-}
diff --git a/core/math/math_fmuladd.odin b/core/math/math_fmuladd.odin
new file mode 100644
index 000000000..632502454
--- /dev/null
+++ b/core/math/math_fmuladd.odin
@@ -0,0 +1,218 @@
+package math
+
+import "base:intrinsics"
+_ :: intrinsics
+
+@(require_results)
+fmuladd_f16 :: proc "contextless" (a, b, c: f16) -> f16 {
+	when IS_WASM {
+		return f16(fmuladd_f64(f64(a), f64(b), f64(c)))
+	} else {
+		foreign _ {
+			@(link_name="llvm.fmuladd.f16", require_results)
+			_fmuladd_f16 :: proc "none" (a, b, c: f16) -> f16 ---
+		}
+
+		return _fmuladd_f16(a, b, c)
+	}
+}
+@(require_results)
+fmuladd_f32 :: proc "contextless" (a, b, c: f32) -> f32 {
+	when IS_WASM {
+		return f32(fmuladd_f64(f64(a), f64(b), f64(c)))
+	} else {
+		foreign _ {
+			@(link_name="llvm.fmuladd.f32", require_results)
+			_fmuladd_f32 :: proc "none" (a, b, c: f32) -> f32 ---
+		}
+
+		return _fmuladd_f32(a, b, c)
+	}
+}
+@(require_results)
+fmuladd_f64 :: proc "contextless" (a, b, c: f64) -> f64 {
+	when IS_WASM {
+		return #force_inline fmuladd_slow_f64(a, b, c)
+	} else {
+		foreign _ {
+			@(link_name="llvm.fmuladd.f64", require_results)
+			_fmuladd_f64 :: proc "none" (a, b, c: f64) -> f64 ---
+		}
+
+		return _fmuladd_f64(a, b, c)
+	}
+}
+
+
+@(require_results)
+fmuladd_slow_f64 :: proc "contextless" (x, y, z: f64) -> f64 {
+	@(require_results)
+	split :: proc "contextless" (b: u64) -> (sign: u32, exp: i32, mantissa: u64) {
+		MASK  :: 0x7FF
+		FRAC_MASK :: 1<<52 - 1
+
+		sign = u32(b >> 63)
+		exp = i32(b>>52) & MASK
+		mantissa = b & FRAC_MASK
+
+		if exp == 0 {
+			shift := uint(intrinsics.count_leading_zeros(mantissa) - 11)
+			mantissa <<= shift
+			exp = 1 - i32(shift)
+		} else {
+			mantissa |= 1<<52
+		}
+		return
+	}
+
+	@(require_results)
+	mul_u64 :: proc "contextless" (x, y: u64) -> (hi, lo: u64) {
+		prod_wide := u128(x) * u128(y)
+		hi, lo = u64(prod_wide>>64), u64(prod_wide)
+		return
+	}
+
+	@(require_results)
+	add_u64 :: proc "contextless" (x, y, carry: u64) -> (sum, carry_out: u64) {
+		tmp_carry, tmp_carry2: bool
+		sum, tmp_carry = intrinsics.overflow_add(x, y)
+		sum, tmp_carry2 = intrinsics.overflow_add(sum, carry)
+		carry_out = u64(tmp_carry | tmp_carry2)
+		return
+	}
+
+	@(require_results)
+	sub_u64 :: proc "contextless" (x, y, borrow: u64) -> (diff, borrow_out: u64) {
+		tmp_borrow, tmp_borrow2: bool
+		diff, tmp_borrow = intrinsics.overflow_sub(x, y)
+		diff, tmp_borrow2 = intrinsics.overflow_sub(diff, borrow)
+		borrow_out = u64(tmp_borrow | tmp_borrow2)
+		return
+	}
+
+	@(require_results)
+	nonzero :: proc "contextless" (x: u64) -> u64 {
+		return 1 if x != 0 else 0
+	}
+
+	@(require_results)
+	zero :: proc "contextless" (x: u64) -> u64 {
+		return 1 if x == 0 else 0
+	}
+
+	@(require_results)
+	shl :: proc "contextless" (u1, u2: u64, n: uint) -> (r1, r2: u64) {
+		r1 = u1<<n | u2>>(64-n) | u2<<(n-64)
+		r2 = u2<<n
+		return
+	}
+	@(require_results)
+	shr :: proc "contextless" (u1, u2: u64, n: uint) -> (r1, r2: u64) {
+		r2 = u2>>n | u1<<(64-n) | u1>>(n-64)
+		r1 = u1>>n
+		return
+	}
+
+	@(require_results)
+	lz :: proc "contextless" (u1, u2: u64) -> (l: i32) {
+		l = i32(intrinsics.count_leading_zeros(u1))
+		if l == 64 {
+			l += i32(intrinsics.count_leading_zeros(u2))
+		}
+		return l
+	}
+
+	@(require_results)
+	shrcompress :: proc "contextless" (u1, u2: u64, n: uint) -> (r1, r2: u64) {
+		switch {
+		case n == 0:
+			return u1, u2
+		case n == 64:
+			return 0, u1 | nonzero(u2)
+		case n >= 128:
+			return 0, nonzero(u1 | u2)
+		case n < 64:
+			r1, r2 = shr(u1, u2, n)
+			r2 |= nonzero(u2 & (1<<n - 1))
+		case n < 128:
+			r1, r2 = shr(u1, u2, n)
+			r2 |= nonzero(u1&(1<<(n-64)-1) | u2)
+		}
+		return
+	}
+
+
+
+
+	UVINF :: 0x7ff0_0000_0000_0000
+	BIAS :: 1023
+
+	bx, by, bz := transmute(u64)x, transmute(u64)y, transmute(u64)z
+
+	switch {
+	case x == 0, y == 0, z == 0,
+	     bx&UVINF == UVINF, by&UVINF == UVINF:
+	     return x*y + z
+	}
+
+	if bz&UVINF == UVINF {
+		return z
+	}
+
+	xs, xe, xm := split(bx)
+	ys, ye, ym := split(by)
+	zs, ze, zm := split(bz)
+
+	pe := xe + ye - BIAS + 1
+
+	pm1, pm2 := mul_u64(xm<<10, ym<<11)
+	zm1, zm2 := zm<<10, u64(0)
+	ps := xs ~ ys // product sign
+
+	is_62_zero := uint((~pm1 >> 62) & 1)
+	pm1, pm2 = shl(pm1, pm2, is_62_zero)
+	pe -= i32(is_62_zero)
+
+	if pe < ze || pe == ze && pm1 < zm1 {
+		// Swap addition operands so |p| >= |z|
+		ps, pe, pm1, pm2, zs, ze, zm1, zm2 = zs, ze, zm1, zm2, ps, pe, pm1, pm2
+	}
+
+	if ps != zs && pe == ze && pm1 == zm1 && pm2 == zm2 {
+		return 0
+	}
+
+	zm1, zm2 = shrcompress(zm1, zm2, uint(pe-ze))
+
+	// Compute resulting significands, normalizing if necessary.
+	m, c: u64
+	if ps == zs {
+		// Adding (pm1:pm2) + (zm1:zm2)
+		pm2, c = add_u64(pm2, zm2, 0)
+		pm1, _ = add_u64(pm1, zm1, c)
+		pe -= i32(~pm1 >> 63)
+		pm1, m = shrcompress(pm1, pm2, uint(64+pm1>>63))
+	} else {
+		// Subtracting (pm1:pm2) - (zm1:zm2)
+		pm2, c = sub_u64(pm2, zm2, 0)
+		pm1, _ = sub_u64(pm1, zm1, c)
+		nz := lz(pm1, pm2)
+		pe -= nz
+		m, pm2 = shl(pm1, pm2, uint(nz-1))
+		m |= nonzero(pm2)
+	}
+
+	// Round and break ties to even
+	if pe > 1022+BIAS || pe == 1022+BIAS && (m+1<<9)>>63 == 1 {
+		// rounded value overflows exponent range
+		return transmute(f64)(u64(ps)<<63 | UVINF)
+	}
+	if pe < 0 {
+		n := uint(-pe)
+		m = m>>n | nonzero(m&(1<<n-1))
+		pe = 0
+	}
+	m = ((m + 1<<9) >> 10) & ~zero((m&(1<<10-1))~1<<9)
+	pe &= -i32(nonzero(m))
+	return transmute(f64)(u64(ps)<<63 + u64(pe)<<52 + m)
+}
\ No newline at end of file
diff --git a/core/math/math_ln.odin b/core/math/math_ln.odin
new file mode 100644
index 000000000..ad1016692
--- /dev/null
+++ b/core/math/math_ln.odin
@@ -0,0 +1,118 @@
+package math
+
+
+@(require_results)
+ln_f64 :: proc "contextless" (x: f64) -> f64 {
+	// The original C code, the long comment, and the constants
+	// below are from FreeBSD's /usr/src/lib/msun/src/e_log.c
+	// and came with this notice.
+	//
+	// ====================================================
+	// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+	//
+	// Developed at SunPro, a Sun Microsystems, Inc. business.
+	// Permission to use, copy, modify, and distribute this
+	// software is freely granted, provided that this notice
+	// is preserved.
+	// ====================================================
+	//
+	// __ieee754_log(x)
+	// Return the logarithm of x
+	//
+	// Method :
+	//   1. Argument Reduction: find k and f such that
+	//			x = 2**k * (1+f),
+	//	   where  sqrt(2)/2 < 1+f < sqrt(2) .
+	//
+	//   2. Approximation of log(1+f).
+	//	Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
+	//		 = 2s + 2/3 s**3 + 2/5 s**5 + .....,
+	//	     	 = 2s + s*R
+	//      We use a special Reme algorithm on [0,0.1716] to generate
+	//	a polynomial of degree 14 to approximate R.  The maximum error
+	//	of this polynomial approximation is bounded by 2**-58.45. In
+	//	other words,
+	//		        2      4      6      8      10      12      14
+	//	    R(z) ~ L1*s +L2*s +L3*s +L4*s +L5*s  +L6*s  +L7*s
+	//	(the values of L1 to L7 are listed in the program) and
+	//	    |      2          14          |     -58.45
+	//	    | L1*s +...+L7*s    -  R(z) | <= 2
+	//	    |                             |
+	//	Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
+	//	In order to guarantee error in log below 1ulp, we compute log by
+	//		log(1+f) = f - s*(f - R)		(if f is not too large)
+	//		log(1+f) = f - (hfsq - s*(hfsq+R)).	(better accuracy)
+	//
+	//	3. Finally,  log(x) = k*Ln2 + log(1+f).
+	//			    = k*Ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*Ln2_lo)))
+	//	   Here Ln2 is split into two floating point number:
+	//			Ln2_hi + Ln2_lo,
+	//	   where n*Ln2_hi is always exact for |n| < 2000.
+	//
+	// Special cases:
+	//	log(x) is NaN with signal if x < 0 (including -INF) ;
+	//	log(+INF) is +INF; log(0) is -INF with signal;
+	//	log(NaN) is that NaN with no signal.
+	//
+	// Accuracy:
+	//	according to an error analysis, the error is always less than
+	//	1 ulp (unit in the last place).
+	//
+	// Constants:
+	// The hexadecimal values are the intended ones for the following
+	// constants. The decimal values may be used, provided that the
+	// compiler will convert from decimal to binary accurately enough
+	// to produce the hexadecimal values shown.
+
+	LN2_HI :: 0h3fe62e42_fee00000 // 6.93147180369123816490e-01
+	LN2_LO :: 0h3dea39ef_35793c76 // 1.90821492927058770002e-10
+	L1     :: 0h3fe55555_55555593 // 6.666666666666735130e-01
+	L2     :: 0h3fd99999_9997fa04 // 3.999999999940941908e-01
+	L3     :: 0h3fd24924_94229359 // 2.857142874366239149e-01
+	L4     :: 0h3fcc71c5_1d8e78af // 2.222219843214978396e-01
+	L5     :: 0h3fc74664_96cb03de // 1.818357216161805012e-01
+	L6     :: 0h3fc39a09_d078c69f // 1.531383769920937332e-01
+	L7     :: 0h3fc2f112_df3e5244 // 1.479819860511658591e-01
+
+	switch {
+	case is_nan(x) || is_inf(x, 1):
+		return x
+	case x < 0:
+		return nan_f64()
+	case x == 0:
+		return inf_f64(-1)
+	}
+
+	// reduce
+	f1, ki := frexp(x)
+	if f1 < SQRT_TWO/2 {
+		f1 *= 2
+		ki -= 1
+	}
+	f := f1 - 1
+	k := f64(ki)
+
+	// compute
+	s := f / (2 + f)
+	s2 := s * s
+	s4 := s2 * s2
+	t1 := s2 * (L1 + s4*(L3+s4*(L5+s4*L7)))
+	t2 := s4 * (L2 + s4*(L4+s4*L6))
+	R := t1 + t2
+	hfsq := 0.5 * f * f
+	return k*LN2_HI - ((hfsq - (s*(hfsq+R) + k*LN2_LO)) - f)
+}
+
+@(require_results) ln_f16   :: proc "contextless" (x: f16)   -> f16   { return #force_inline f16(ln_f64(f64(x))) }
+@(require_results) ln_f32   :: proc "contextless" (x: f32)   -> f32   { return #force_inline f32(ln_f64(f64(x))) }
+@(require_results) ln_f16le :: proc "contextless" (x: f16le) -> f16le { return #force_inline f16le(ln_f64(f64(x))) }
+@(require_results) ln_f16be :: proc "contextless" (x: f16be) -> f16be { return #force_inline f16be(ln_f64(f64(x))) }
+@(require_results) ln_f32le :: proc "contextless" (x: f32le) -> f32le { return #force_inline f32le(ln_f64(f64(x))) }
+@(require_results) ln_f32be :: proc "contextless" (x: f32be) -> f32be { return #force_inline f32be(ln_f64(f64(x))) }
+@(require_results) ln_f64le :: proc "contextless" (x: f64le) -> f64le { return #force_inline f64le(ln_f64(f64(x))) }
+@(require_results) ln_f64be :: proc "contextless" (x: f64be) -> f64be { return #force_inline f64be(ln_f64(f64(x))) }
+ln :: proc{
+	ln_f16, ln_f16le, ln_f16be,
+	ln_f32, ln_f32le, ln_f32be,
+	ln_f64, ln_f64le, ln_f64be,
+}
\ No newline at end of file
diff --git a/core/math/math_pow.odin b/core/math/math_pow.odin
new file mode 100644
index 000000000..c51b662bc
--- /dev/null
+++ b/core/math/math_pow.odin
@@ -0,0 +1,210 @@
+package math
+
+
+// pow returns x**y, the base-x exponential of y.
+//
+// Special cases are (in order):
+//
+//	pow(x, ±0) = 1 for any x
+//	pow(1, y) = 1 for any y
+//	pow(x, 1) = x for any x
+//	pow(NaN, y) = NaN
+//	pow(x, NaN) = NaN
+//	pow(±0, y) = ±Inf for y an odd integer < 0
+//	pow(±0, -Inf) = +Inf
+//	pow(±0, +Inf) = +0
+//	pow(±0, y) = +Inf for finite y < 0 and not an odd integer
+//	pow(±0, y) = ±0 for y an odd integer > 0
+//	pow(±0, y) = +0 for finite y > 0 and not an odd integer
+//	pow(-1, ±Inf) = 1
+//	pow(x, +Inf) = +Inf for |x| > 1
+//	pow(x, -Inf) = +0 for |x| > 1
+//	pow(x, +Inf) = +0 for |x| < 1
+//	pow(x, -Inf) = +Inf for |x| < 1
+//	pow(+Inf, y) = +Inf for y > 0
+//	pow(+Inf, y) = +0 for y < 0
+//	pow(-Inf, y) = pow(-0, -y)
+//	pow(x, y) = NaN for finite x < 0 and finite non-integer y
+//
+// Special cases taken from FreeBSD's /usr/src/lib/msun/src/e_pow.c
+// updated by IEEE Std. 754-2008 "Section 9.2.1 Special values".
+@(require_results)
+pow_f64 :: proc "contextless" (x, y: f64) -> f64 {
+	is_odd_int :: proc "contextless" (x: f64) -> bool {
+		if abs(x) >= (1<<53) {
+			return false
+		}
+
+		i, f := modf(x)
+		return f == 0 && (i64(i)&1 == 1)
+	}
+
+	switch {
+	case y == 0 || x == 1:
+		return 1.0
+	case y == 1:
+		return x
+	case is_nan(x) || is_nan(y):
+		return nan_f64()
+	case x == 0:
+		switch {
+		case y < 0:
+			if signbit(x) && is_odd_int(y) {
+				return inf_f64(-1)
+			}
+			return inf_f64(1)
+		case y > 0:
+			if signbit(x) && is_odd_int(y) {
+				return x
+			}
+			return 0.0
+		}
+	case is_inf(y, 0):
+		switch {
+		case x == -1:
+			return 1.0
+		case (abs(x) < 1) == is_inf(y, 1):
+			return 0.0
+		case:
+			return inf_f64(1)
+		}
+	case is_inf(x, 0):
+		if is_inf(x, -1) {
+			// pow(-0, -y)
+			return pow_f64(1.0/x, -y)
+		}
+		switch {
+		case y < 0:
+			return 0.0
+		case y > 0:
+			return inf_f64(1)
+		}
+	case y == 0.5:
+		return sqrt_f64(x)
+	case y == -0.5:
+		return 1.0 / sqrt_f64(x)
+	}
+
+	yi, yf := modf(abs(y))
+	if yf != 0 && x < 0 {
+		return nan_f64()
+	}
+	if yi >= 1<<63 {
+		// yi is a large even int that will lead to overflow (or underflow to 0)
+		// for all x except -1 (x == 1 was handled earlier)
+		switch {
+		case x == -1:
+			return 1.0
+		case (abs(x) < 1) == (y > 0):
+			return 0.0
+		case:
+			return inf_f64(1)
+		}
+	}
+
+	// ans = a1 * 2**ae (= 1 for now).
+	a1: f64 = 1
+	ae: int = 0
+
+	// ans *= x**yf
+	if yf != 0 {
+		if yf > 0.5 {
+			yf -= 1
+			yi += 1
+		}
+		a1 = exp(yf * ln(x))
+	}
+
+	// ans *= x**yi
+	// by multiplying in successive squarings
+	// of x according to bits of yi.
+	// accumulate powers of two into exp.
+	x1, xe := frexp(x)
+	for i := i64(yi); i != 0; i >>= 1 {
+		if xe < -1<<12 || 1<<12 < xe {
+			// catch xe before it overflows the left shift below
+			// Since i !=0 it has at least one bit still set, so ae will accumulate xe
+			// on at least one more iteration, ae += xe is a lower bound on ae
+			// the lower bound on ae exceeds the size of a f64 exp
+			// so the final call to ldexp will produce under/overflow (0/Inf)
+			ae += xe
+			break
+		}
+		if i&1 == 1 {
+			a1 *= x1
+			ae += xe
+		}
+		x1 *= x1
+		xe <<= 1
+		if x1 < .5 {
+			x1 += x1
+			xe -= 1
+		}
+	}
+
+	// ans = a1*2**ae
+	// if y < 0 { ans = 1 / ans }
+	// but in the opposite order
+	if y < 0 {
+		a1 = 1 / a1
+		ae = -ae
+	}
+	return ldexp(a1, ae)
+}
+
+
+@(require_results) pow_f16 :: proc "contextless" (x, power: f16) -> f16 { return f16(pow_f64(f64(x), f64(power))) }
+@(require_results) pow_f32 :: proc "contextless" (x, power: f32) -> f32 { return f32(pow_f64(f64(x), f64(power))) }
+
+
+
+exp_f64 :: proc "contextless" (x: f64) -> f64 {
+	LN2_HI :: 6.93147180369123816490e-01
+	LN2_LO :: 1.90821492927058770002e-10
+	LOG2_E :: 1.44269504088896338700e+00
+
+	OVERFLOW  :: 7.09782712893383973096e+02
+	UNDERFLOW :: -7.45133219101941108420e+02
+	NEAR_ZERO  :: 1.0 / (1 << 28) // 2**-28
+
+	// special cases
+	switch {
+	case is_nan(x) || is_inf(x, 1):
+		return x
+	case is_inf(x, -1):
+		return 0
+	case x > OVERFLOW:
+		return inf_f64(1)
+	case x < UNDERFLOW:
+		return 0
+	case -NEAR_ZERO < x && x < NEAR_ZERO:
+		return 1 + x
+	}
+
+	// reduce; computed as r = hi - lo for extra precision.
+	k: int
+	switch {
+	case x < 0:
+		k = int(LOG2_E*x - 0.5)
+	case x > 0:
+		k = int(LOG2_E*x + 0.5)
+	}
+	hi := x - f64(k)*LN2_HI
+	lo := f64(k) * LN2_LO
+
+	P1 :: 0h3FC5555555555555 //  1.66666666666666657415e-01
+	P2 :: 0hBF66C16C16BEBD93 // -2.77777777770155933842e-03
+	P3 :: 0h3F11566AAF25DE2C //  6.61375632143793436117e-05
+	P4 :: 0hBEBBBD41C5D26BF1 // -1.65339022054652515390e-06
+	P5 :: 0h3E66376972BEA4D0 //  4.13813679705723846039e-08
+
+	r := hi - lo
+	t := r * r
+	c := r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))))
+	y := 1 - ((lo - (r*c)/(2-c)) - hi)
+	return ldexp(y, k)
+}
+
+@(require_results) exp_f16 :: proc "contextless" (x: f16) -> f16 { return f16(exp_f64(f64(x))) }
+@(require_results) exp_f32 :: proc "contextless" (x: f32) -> f32 { return f32(exp_f64(f64(x))) }
+
-- 
cgit v1.2.3