From b56a0e0f0346237a3254045ee68e539cfcb11be1 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sun, 2 Jun 2024 23:29:43 +0100 Subject: Remove `libm` dependency in `core:math` where possible --- core/math/math.odin | 32 ++++++- core/math/math_basic.odin | 200 --------------------------------------- core/math/math_basic_js.odin | 47 ---------- core/math/math_fmuladd.odin | 218 +++++++++++++++++++++++++++++++++++++++++++ core/math/math_ln.odin | 118 +++++++++++++++++++++++ core/math/math_pow.odin | 210 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 575 insertions(+), 250 deletions(-) delete mode 100644 core/math/math_basic.odin delete mode 100644 core/math/math_basic_js.odin create mode 100644 core/math/math_fmuladd.odin create mode 100644 core/math/math_ln.odin create mode 100644 core/math/math_pow.odin diff --git a/core/math/math.odin b/core/math/math.odin index 8d85c2381..534ef35f7 100644 --- a/core/math/math.odin +++ b/core/math/math.odin @@ -14,10 +14,10 @@ Float_Class :: enum { Neg_Inf, // negative infinity } -TAU :: 6.28318530717958647692528676655900576 -PI :: 3.14159265358979323846264338327950288 +TAU :: 6.28318530717958647692528676655900576 +PI :: 3.14159265358979323846264338327950288 -E :: 2.71828182845904523536 +E :: 2.71828182845904523536 τ :: TAU π :: PI @@ -42,6 +42,32 @@ min :: builtin.min max :: builtin.max clamp :: builtin.clamp + +@(private) +IS_WASM :: ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 + +@(require_results) +sqrt_f16 :: proc "contextless" (x: f16) -> f16 { + when IS_WASM { + return f16(sqrt_f64(f64(x))) + } else { + return intrinsics.sqrt(x) + } +} +@(require_results) +sqrt_f32 :: proc "contextless" (x: f32) -> f32 { + when IS_WASM { + return f32(sqrt_f64(f64(x))) + } else { + return intrinsics.sqrt(x) + } +} +@(require_results) +sqrt_f64 :: proc "contextless" (x: f64) -> f64 { + return intrinsics.sqrt(x) +} + + @(require_results) sqrt_f16le :: proc "contextless" (x: f16le) -> f16le { return #force_inline f16le(sqrt_f16(f16(x))) } @(require_results) sqrt_f16be :: proc "contextless" (x: f16be) -> f16be { return #force_inline f16be(sqrt_f16(f16(x))) } @(require_results) sqrt_f32le :: proc "contextless" (x: f32le) -> f32le { return #force_inline f32le(sqrt_f32(f32(x))) } diff --git a/core/math/math_basic.odin b/core/math/math_basic.odin deleted file mode 100644 index 2c73232cc..000000000 --- a/core/math/math_basic.odin +++ /dev/null @@ -1,200 +0,0 @@ -//+build !js -package math - -import "base:intrinsics" - -@(default_calling_convention="none", private="file") -foreign _ { - @(link_name="llvm.pow.f16", require_results) - _pow_f16 :: proc(x, power: f16) -> f16 --- - @(link_name="llvm.pow.f32", require_results) - _pow_f32 :: proc(x, power: f32) -> f32 --- - @(link_name="llvm.pow.f64", require_results) - _pow_f64 :: proc(x, power: f64) -> f64 --- - - @(link_name="llvm.fmuladd.f16", require_results) - _fmuladd_f16 :: proc(a, b, c: f16) -> f16 --- - @(link_name="llvm.fmuladd.f32", require_results) - _fmuladd_f32 :: proc(a, b, c: f32) -> f32 --- - @(link_name="llvm.fmuladd.f64", require_results) - _fmuladd_f64 :: proc(a, b, c: f64) -> f64 --- - - @(link_name="llvm.exp.f16", require_results) - _exp_f16 :: proc(x: f16) -> f16 --- - @(link_name="llvm.exp.f32", require_results) - _exp_f32 :: proc(x: f32) -> f32 --- - @(link_name="llvm.exp.f64", require_results) - _exp_f64 :: proc(x: f64) -> f64 --- -} - - -@(require_results) -pow_f16 :: proc "contextless" (x, power: f16) -> f16 { - return _pow_f16(x, power) -} -@(require_results) -pow_f32 :: proc "contextless" (x, power: f32) -> f32 { - return _pow_f32(x, power) -} -@(require_results) -pow_f64 :: proc "contextless" (x, power: f64) -> f64 { - return _pow_f64(x, power) -} - -@(require_results) -fmuladd_f16 :: proc "contextless" (a, b, c: f16) -> f16 { - return _fmuladd_f16(a, b, c) -} -@(require_results) -fmuladd_f32 :: proc "contextless" (a, b, c: f32) -> f32 { - return _fmuladd_f32(a, b, c) -} -@(require_results) -fmuladd_f64 :: proc "contextless" (a, b, c: f64) -> f64 { - return _fmuladd_f64(a, b, c) -} - -@(require_results) -exp_f16 :: proc "contextless" (x: f16) -> f16 { - return _exp_f16(x) -} -@(require_results) -exp_f32 :: proc "contextless" (x: f32) -> f32 { - return _exp_f32(x) -} -@(require_results) -exp_f64 :: proc "contextless" (x: f64) -> f64 { - return _exp_f64(x) -} - - -@(require_results) -sqrt_f16 :: proc "contextless" (x: f16) -> f16 { - return intrinsics.sqrt(x) -} -@(require_results) -sqrt_f32 :: proc "contextless" (x: f32) -> f32 { - return intrinsics.sqrt(x) -} -@(require_results) -sqrt_f64 :: proc "contextless" (x: f64) -> f64 { - return intrinsics.sqrt(x) -} - - - -@(require_results) -ln_f64 :: proc "contextless" (x: f64) -> f64 { - // The original C code, the long comment, and the constants - // below are from FreeBSD's /usr/src/lib/msun/src/e_log.c - // and came with this notice. - // - // ==================================================== - // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - // - // Developed at SunPro, a Sun Microsystems, Inc. business. - // Permission to use, copy, modify, and distribute this - // software is freely granted, provided that this notice - // is preserved. - // ==================================================== - // - // __ieee754_log(x) - // Return the logarithm of x - // - // Method : - // 1. Argument Reduction: find k and f such that - // x = 2**k * (1+f), - // where sqrt(2)/2 < 1+f < sqrt(2) . - // - // 2. Approximation of log(1+f). - // Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) - // = 2s + 2/3 s**3 + 2/5 s**5 + ....., - // = 2s + s*R - // We use a special Reme algorithm on [0,0.1716] to generate - // a polynomial of degree 14 to approximate R. The maximum error - // of this polynomial approximation is bounded by 2**-58.45. In - // other words, - // 2 4 6 8 10 12 14 - // R(z) ~ L1*s +L2*s +L3*s +L4*s +L5*s +L6*s +L7*s - // (the values of L1 to L7 are listed in the program) and - // | 2 14 | -58.45 - // | L1*s +...+L7*s - R(z) | <= 2 - // | | - // Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. - // In order to guarantee error in log below 1ulp, we compute log by - // log(1+f) = f - s*(f - R) (if f is not too large) - // log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) - // - // 3. Finally, log(x) = k*Ln2 + log(1+f). - // = k*Ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*Ln2_lo))) - // Here Ln2 is split into two floating point number: - // Ln2_hi + Ln2_lo, - // where n*Ln2_hi is always exact for |n| < 2000. - // - // Special cases: - // log(x) is NaN with signal if x < 0 (including -INF) ; - // log(+INF) is +INF; log(0) is -INF with signal; - // log(NaN) is that NaN with no signal. - // - // Accuracy: - // according to an error analysis, the error is always less than - // 1 ulp (unit in the last place). - // - // Constants: - // The hexadecimal values are the intended ones for the following - // constants. The decimal values may be used, provided that the - // compiler will convert from decimal to binary accurately enough - // to produce the hexadecimal values shown. - - LN2_HI :: 0h3fe62e42_fee00000 // 6.93147180369123816490e-01 - LN2_LO :: 0h3dea39ef_35793c76 // 1.90821492927058770002e-10 - L1 :: 0h3fe55555_55555593 // 6.666666666666735130e-01 - L2 :: 0h3fd99999_9997fa04 // 3.999999999940941908e-01 - L3 :: 0h3fd24924_94229359 // 2.857142874366239149e-01 - L4 :: 0h3fcc71c5_1d8e78af // 2.222219843214978396e-01 - L5 :: 0h3fc74664_96cb03de // 1.818357216161805012e-01 - L6 :: 0h3fc39a09_d078c69f // 1.531383769920937332e-01 - L7 :: 0h3fc2f112_df3e5244 // 1.479819860511658591e-01 - - switch { - case is_nan(x) || is_inf(x, 1): - return x - case x < 0: - return nan_f64() - case x == 0: - return inf_f64(-1) - } - - // reduce - f1, ki := frexp(x) - if f1 < SQRT_TWO/2 { - f1 *= 2 - ki -= 1 - } - f := f1 - 1 - k := f64(ki) - - // compute - s := f / (2 + f) - s2 := s * s - s4 := s2 * s2 - t1 := s2 * (L1 + s4*(L3+s4*(L5+s4*L7))) - t2 := s4 * (L2 + s4*(L4+s4*L6)) - R := t1 + t2 - hfsq := 0.5 * f * f - return k*LN2_HI - ((hfsq - (s*(hfsq+R) + k*LN2_LO)) - f) -} - -@(require_results) ln_f16 :: proc "contextless" (x: f16) -> f16 { return #force_inline f16(ln_f64(f64(x))) } -@(require_results) ln_f32 :: proc "contextless" (x: f32) -> f32 { return #force_inline f32(ln_f64(f64(x))) } -@(require_results) ln_f16le :: proc "contextless" (x: f16le) -> f16le { return #force_inline f16le(ln_f64(f64(x))) } -@(require_results) ln_f16be :: proc "contextless" (x: f16be) -> f16be { return #force_inline f16be(ln_f64(f64(x))) } -@(require_results) ln_f32le :: proc "contextless" (x: f32le) -> f32le { return #force_inline f32le(ln_f64(f64(x))) } -@(require_results) ln_f32be :: proc "contextless" (x: f32be) -> f32be { return #force_inline f32be(ln_f64(f64(x))) } -@(require_results) ln_f64le :: proc "contextless" (x: f64le) -> f64le { return #force_inline f64le(ln_f64(f64(x))) } -@(require_results) ln_f64be :: proc "contextless" (x: f64be) -> f64be { return #force_inline f64be(ln_f64(f64(x))) } -ln :: proc{ - ln_f16, ln_f16le, ln_f16be, - ln_f32, ln_f32le, ln_f32be, - ln_f64, ln_f64le, ln_f64be, -} \ No newline at end of file diff --git a/core/math/math_basic_js.odin b/core/math/math_basic_js.odin deleted file mode 100644 index df68da1b8..000000000 --- a/core/math/math_basic_js.odin +++ /dev/null @@ -1,47 +0,0 @@ -//+build js -package math - -import "base:intrinsics" - -foreign import "odin_env" - -@(default_calling_convention="c") -foreign odin_env { - @(link_name="pow", require_results) - pow_f64 :: proc(x, power: f64) -> f64 --- - @(link_name="fmuladd", require_results) - fmuladd_f64 :: proc(a, b, c: f64) -> f64 --- - @(link_name="ln", require_results) - ln_f64 :: proc(x: f64) -> f64 --- - @(link_name="exp", require_results) - exp_f64 :: proc(x: f64) -> f64 --- -} - -@(require_results) -sqrt_f64 :: proc "contextless" (x: f64) -> f64 { - return intrinsics.sqrt(x) -} - -@(require_results) sqrt_f16 :: proc "c" (x: f16) -> f16 { return f16(sqrt_f64(f64(x))) } -@(require_results) pow_f16 :: proc "c" (x, power: f16) -> f16 { return f16(pow_f64(f64(x), f64(power))) } -@(require_results) fmuladd_f16 :: proc "c" (a, b, c: f16) -> f16 { return f16(fmuladd_f64(f64(a), f64(a), f64(c))) } -@(require_results) ln_f16 :: proc "c" (x: f16) -> f16 { return f16(ln_f64(f64(x))) } -@(require_results) exp_f16 :: proc "c" (x: f16) -> f16 { return f16(exp_f64(f64(x))) } - -@(require_results) sqrt_f32 :: proc "c" (x: f32) -> f32 { return f32(sqrt_f64(f64(x))) } -@(require_results) pow_f32 :: proc "c" (x, power: f32) -> f32 { return f32(pow_f64(f64(x), f64(power))) } -@(require_results) fmuladd_f32 :: proc "c" (a, b, c: f32) -> f32 { return f32(fmuladd_f64(f64(a), f64(a), f64(c))) } -@(require_results) ln_f32 :: proc "c" (x: f32) -> f32 { return f32(ln_f64(f64(x))) } -@(require_results) exp_f32 :: proc "c" (x: f32) -> f32 { return f32(exp_f64(f64(x))) } - -@(require_results) ln_f16le :: proc "contextless" (x: f16le) -> f16le { return #force_inline f16le(ln_f64(f64(x))) } -@(require_results) ln_f16be :: proc "contextless" (x: f16be) -> f16be { return #force_inline f16be(ln_f64(f64(x))) } -@(require_results) ln_f32le :: proc "contextless" (x: f32le) -> f32le { return #force_inline f32le(ln_f64(f64(x))) } -@(require_results) ln_f32be :: proc "contextless" (x: f32be) -> f32be { return #force_inline f32be(ln_f64(f64(x))) } -@(require_results) ln_f64le :: proc "contextless" (x: f64le) -> f64le { return #force_inline f64le(ln_f64(f64(x))) } -@(require_results) ln_f64be :: proc "contextless" (x: f64be) -> f64be { return #force_inline f64be(ln_f64(f64(x))) } -ln :: proc{ - ln_f16, ln_f16le, ln_f16be, - ln_f32, ln_f32le, ln_f32be, - ln_f64, ln_f64le, ln_f64be, -} diff --git a/core/math/math_fmuladd.odin b/core/math/math_fmuladd.odin new file mode 100644 index 000000000..632502454 --- /dev/null +++ b/core/math/math_fmuladd.odin @@ -0,0 +1,218 @@ +package math + +import "base:intrinsics" +_ :: intrinsics + +@(require_results) +fmuladd_f16 :: proc "contextless" (a, b, c: f16) -> f16 { + when IS_WASM { + return f16(fmuladd_f64(f64(a), f64(b), f64(c))) + } else { + foreign _ { + @(link_name="llvm.fmuladd.f16", require_results) + _fmuladd_f16 :: proc "none" (a, b, c: f16) -> f16 --- + } + + return _fmuladd_f16(a, b, c) + } +} +@(require_results) +fmuladd_f32 :: proc "contextless" (a, b, c: f32) -> f32 { + when IS_WASM { + return f32(fmuladd_f64(f64(a), f64(b), f64(c))) + } else { + foreign _ { + @(link_name="llvm.fmuladd.f32", require_results) + _fmuladd_f32 :: proc "none" (a, b, c: f32) -> f32 --- + } + + return _fmuladd_f32(a, b, c) + } +} +@(require_results) +fmuladd_f64 :: proc "contextless" (a, b, c: f64) -> f64 { + when IS_WASM { + return #force_inline fmuladd_slow_f64(a, b, c) + } else { + foreign _ { + @(link_name="llvm.fmuladd.f64", require_results) + _fmuladd_f64 :: proc "none" (a, b, c: f64) -> f64 --- + } + + return _fmuladd_f64(a, b, c) + } +} + + +@(require_results) +fmuladd_slow_f64 :: proc "contextless" (x, y, z: f64) -> f64 { + @(require_results) + split :: proc "contextless" (b: u64) -> (sign: u32, exp: i32, mantissa: u64) { + MASK :: 0x7FF + FRAC_MASK :: 1<<52 - 1 + + sign = u32(b >> 63) + exp = i32(b>>52) & MASK + mantissa = b & FRAC_MASK + + if exp == 0 { + shift := uint(intrinsics.count_leading_zeros(mantissa) - 11) + mantissa <<= shift + exp = 1 - i32(shift) + } else { + mantissa |= 1<<52 + } + return + } + + @(require_results) + mul_u64 :: proc "contextless" (x, y: u64) -> (hi, lo: u64) { + prod_wide := u128(x) * u128(y) + hi, lo = u64(prod_wide>>64), u64(prod_wide) + return + } + + @(require_results) + add_u64 :: proc "contextless" (x, y, carry: u64) -> (sum, carry_out: u64) { + tmp_carry, tmp_carry2: bool + sum, tmp_carry = intrinsics.overflow_add(x, y) + sum, tmp_carry2 = intrinsics.overflow_add(sum, carry) + carry_out = u64(tmp_carry | tmp_carry2) + return + } + + @(require_results) + sub_u64 :: proc "contextless" (x, y, borrow: u64) -> (diff, borrow_out: u64) { + tmp_borrow, tmp_borrow2: bool + diff, tmp_borrow = intrinsics.overflow_sub(x, y) + diff, tmp_borrow2 = intrinsics.overflow_sub(diff, borrow) + borrow_out = u64(tmp_borrow | tmp_borrow2) + return + } + + @(require_results) + nonzero :: proc "contextless" (x: u64) -> u64 { + return 1 if x != 0 else 0 + } + + @(require_results) + zero :: proc "contextless" (x: u64) -> u64 { + return 1 if x == 0 else 0 + } + + @(require_results) + shl :: proc "contextless" (u1, u2: u64, n: uint) -> (r1, r2: u64) { + r1 = u1<>(64-n) | u2<<(n-64) + r2 = u2< (r1, r2: u64) { + r2 = u2>>n | u1<<(64-n) | u1>>(n-64) + r1 = u1>>n + return + } + + @(require_results) + lz :: proc "contextless" (u1, u2: u64) -> (l: i32) { + l = i32(intrinsics.count_leading_zeros(u1)) + if l == 64 { + l += i32(intrinsics.count_leading_zeros(u2)) + } + return l + } + + @(require_results) + shrcompress :: proc "contextless" (u1, u2: u64, n: uint) -> (r1, r2: u64) { + switch { + case n == 0: + return u1, u2 + case n == 64: + return 0, u1 | nonzero(u2) + case n >= 128: + return 0, nonzero(u1 | u2) + case n < 64: + r1, r2 = shr(u1, u2, n) + r2 |= nonzero(u2 & (1<> 62) & 1) + pm1, pm2 = shl(pm1, pm2, is_62_zero) + pe -= i32(is_62_zero) + + if pe < ze || pe == ze && pm1 < zm1 { + // Swap addition operands so |p| >= |z| + ps, pe, pm1, pm2, zs, ze, zm1, zm2 = zs, ze, zm1, zm2, ps, pe, pm1, pm2 + } + + if ps != zs && pe == ze && pm1 == zm1 && pm2 == zm2 { + return 0 + } + + zm1, zm2 = shrcompress(zm1, zm2, uint(pe-ze)) + + // Compute resulting significands, normalizing if necessary. + m, c: u64 + if ps == zs { + // Adding (pm1:pm2) + (zm1:zm2) + pm2, c = add_u64(pm2, zm2, 0) + pm1, _ = add_u64(pm1, zm1, c) + pe -= i32(~pm1 >> 63) + pm1, m = shrcompress(pm1, pm2, uint(64+pm1>>63)) + } else { + // Subtracting (pm1:pm2) - (zm1:zm2) + pm2, c = sub_u64(pm2, zm2, 0) + pm1, _ = sub_u64(pm1, zm1, c) + nz := lz(pm1, pm2) + pe -= nz + m, pm2 = shl(pm1, pm2, uint(nz-1)) + m |= nonzero(pm2) + } + + // Round and break ties to even + if pe > 1022+BIAS || pe == 1022+BIAS && (m+1<<9)>>63 == 1 { + // rounded value overflows exponent range + return transmute(f64)(u64(ps)<<63 | UVINF) + } + if pe < 0 { + n := uint(-pe) + m = m>>n | nonzero(m&(1<> 10) & ~zero((m&(1<<10-1))~1<<9) + pe &= -i32(nonzero(m)) + return transmute(f64)(u64(ps)<<63 + u64(pe)<<52 + m) +} \ No newline at end of file diff --git a/core/math/math_ln.odin b/core/math/math_ln.odin new file mode 100644 index 000000000..ad1016692 --- /dev/null +++ b/core/math/math_ln.odin @@ -0,0 +1,118 @@ +package math + + +@(require_results) +ln_f64 :: proc "contextless" (x: f64) -> f64 { + // The original C code, the long comment, and the constants + // below are from FreeBSD's /usr/src/lib/msun/src/e_log.c + // and came with this notice. + // + // ==================================================== + // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + // + // Developed at SunPro, a Sun Microsystems, Inc. business. + // Permission to use, copy, modify, and distribute this + // software is freely granted, provided that this notice + // is preserved. + // ==================================================== + // + // __ieee754_log(x) + // Return the logarithm of x + // + // Method : + // 1. Argument Reduction: find k and f such that + // x = 2**k * (1+f), + // where sqrt(2)/2 < 1+f < sqrt(2) . + // + // 2. Approximation of log(1+f). + // Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) + // = 2s + 2/3 s**3 + 2/5 s**5 + ....., + // = 2s + s*R + // We use a special Reme algorithm on [0,0.1716] to generate + // a polynomial of degree 14 to approximate R. The maximum error + // of this polynomial approximation is bounded by 2**-58.45. In + // other words, + // 2 4 6 8 10 12 14 + // R(z) ~ L1*s +L2*s +L3*s +L4*s +L5*s +L6*s +L7*s + // (the values of L1 to L7 are listed in the program) and + // | 2 14 | -58.45 + // | L1*s +...+L7*s - R(z) | <= 2 + // | | + // Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. + // In order to guarantee error in log below 1ulp, we compute log by + // log(1+f) = f - s*(f - R) (if f is not too large) + // log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) + // + // 3. Finally, log(x) = k*Ln2 + log(1+f). + // = k*Ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*Ln2_lo))) + // Here Ln2 is split into two floating point number: + // Ln2_hi + Ln2_lo, + // where n*Ln2_hi is always exact for |n| < 2000. + // + // Special cases: + // log(x) is NaN with signal if x < 0 (including -INF) ; + // log(+INF) is +INF; log(0) is -INF with signal; + // log(NaN) is that NaN with no signal. + // + // Accuracy: + // according to an error analysis, the error is always less than + // 1 ulp (unit in the last place). + // + // Constants: + // The hexadecimal values are the intended ones for the following + // constants. The decimal values may be used, provided that the + // compiler will convert from decimal to binary accurately enough + // to produce the hexadecimal values shown. + + LN2_HI :: 0h3fe62e42_fee00000 // 6.93147180369123816490e-01 + LN2_LO :: 0h3dea39ef_35793c76 // 1.90821492927058770002e-10 + L1 :: 0h3fe55555_55555593 // 6.666666666666735130e-01 + L2 :: 0h3fd99999_9997fa04 // 3.999999999940941908e-01 + L3 :: 0h3fd24924_94229359 // 2.857142874366239149e-01 + L4 :: 0h3fcc71c5_1d8e78af // 2.222219843214978396e-01 + L5 :: 0h3fc74664_96cb03de // 1.818357216161805012e-01 + L6 :: 0h3fc39a09_d078c69f // 1.531383769920937332e-01 + L7 :: 0h3fc2f112_df3e5244 // 1.479819860511658591e-01 + + switch { + case is_nan(x) || is_inf(x, 1): + return x + case x < 0: + return nan_f64() + case x == 0: + return inf_f64(-1) + } + + // reduce + f1, ki := frexp(x) + if f1 < SQRT_TWO/2 { + f1 *= 2 + ki -= 1 + } + f := f1 - 1 + k := f64(ki) + + // compute + s := f / (2 + f) + s2 := s * s + s4 := s2 * s2 + t1 := s2 * (L1 + s4*(L3+s4*(L5+s4*L7))) + t2 := s4 * (L2 + s4*(L4+s4*L6)) + R := t1 + t2 + hfsq := 0.5 * f * f + return k*LN2_HI - ((hfsq - (s*(hfsq+R) + k*LN2_LO)) - f) +} + +@(require_results) ln_f16 :: proc "contextless" (x: f16) -> f16 { return #force_inline f16(ln_f64(f64(x))) } +@(require_results) ln_f32 :: proc "contextless" (x: f32) -> f32 { return #force_inline f32(ln_f64(f64(x))) } +@(require_results) ln_f16le :: proc "contextless" (x: f16le) -> f16le { return #force_inline f16le(ln_f64(f64(x))) } +@(require_results) ln_f16be :: proc "contextless" (x: f16be) -> f16be { return #force_inline f16be(ln_f64(f64(x))) } +@(require_results) ln_f32le :: proc "contextless" (x: f32le) -> f32le { return #force_inline f32le(ln_f64(f64(x))) } +@(require_results) ln_f32be :: proc "contextless" (x: f32be) -> f32be { return #force_inline f32be(ln_f64(f64(x))) } +@(require_results) ln_f64le :: proc "contextless" (x: f64le) -> f64le { return #force_inline f64le(ln_f64(f64(x))) } +@(require_results) ln_f64be :: proc "contextless" (x: f64be) -> f64be { return #force_inline f64be(ln_f64(f64(x))) } +ln :: proc{ + ln_f16, ln_f16le, ln_f16be, + ln_f32, ln_f32le, ln_f32be, + ln_f64, ln_f64le, ln_f64be, +} \ No newline at end of file diff --git a/core/math/math_pow.odin b/core/math/math_pow.odin new file mode 100644 index 000000000..c51b662bc --- /dev/null +++ b/core/math/math_pow.odin @@ -0,0 +1,210 @@ +package math + + +// pow returns x**y, the base-x exponential of y. +// +// Special cases are (in order): +// +// pow(x, ±0) = 1 for any x +// pow(1, y) = 1 for any y +// pow(x, 1) = x for any x +// pow(NaN, y) = NaN +// pow(x, NaN) = NaN +// pow(±0, y) = ±Inf for y an odd integer < 0 +// pow(±0, -Inf) = +Inf +// pow(±0, +Inf) = +0 +// pow(±0, y) = +Inf for finite y < 0 and not an odd integer +// pow(±0, y) = ±0 for y an odd integer > 0 +// pow(±0, y) = +0 for finite y > 0 and not an odd integer +// pow(-1, ±Inf) = 1 +// pow(x, +Inf) = +Inf for |x| > 1 +// pow(x, -Inf) = +0 for |x| > 1 +// pow(x, +Inf) = +0 for |x| < 1 +// pow(x, -Inf) = +Inf for |x| < 1 +// pow(+Inf, y) = +Inf for y > 0 +// pow(+Inf, y) = +0 for y < 0 +// pow(-Inf, y) = pow(-0, -y) +// pow(x, y) = NaN for finite x < 0 and finite non-integer y +// +// Special cases taken from FreeBSD's /usr/src/lib/msun/src/e_pow.c +// updated by IEEE Std. 754-2008 "Section 9.2.1 Special values". +@(require_results) +pow_f64 :: proc "contextless" (x, y: f64) -> f64 { + is_odd_int :: proc "contextless" (x: f64) -> bool { + if abs(x) >= (1<<53) { + return false + } + + i, f := modf(x) + return f == 0 && (i64(i)&1 == 1) + } + + switch { + case y == 0 || x == 1: + return 1.0 + case y == 1: + return x + case is_nan(x) || is_nan(y): + return nan_f64() + case x == 0: + switch { + case y < 0: + if signbit(x) && is_odd_int(y) { + return inf_f64(-1) + } + return inf_f64(1) + case y > 0: + if signbit(x) && is_odd_int(y) { + return x + } + return 0.0 + } + case is_inf(y, 0): + switch { + case x == -1: + return 1.0 + case (abs(x) < 1) == is_inf(y, 1): + return 0.0 + case: + return inf_f64(1) + } + case is_inf(x, 0): + if is_inf(x, -1) { + // pow(-0, -y) + return pow_f64(1.0/x, -y) + } + switch { + case y < 0: + return 0.0 + case y > 0: + return inf_f64(1) + } + case y == 0.5: + return sqrt_f64(x) + case y == -0.5: + return 1.0 / sqrt_f64(x) + } + + yi, yf := modf(abs(y)) + if yf != 0 && x < 0 { + return nan_f64() + } + if yi >= 1<<63 { + // yi is a large even int that will lead to overflow (or underflow to 0) + // for all x except -1 (x == 1 was handled earlier) + switch { + case x == -1: + return 1.0 + case (abs(x) < 1) == (y > 0): + return 0.0 + case: + return inf_f64(1) + } + } + + // ans = a1 * 2**ae (= 1 for now). + a1: f64 = 1 + ae: int = 0 + + // ans *= x**yf + if yf != 0 { + if yf > 0.5 { + yf -= 1 + yi += 1 + } + a1 = exp(yf * ln(x)) + } + + // ans *= x**yi + // by multiplying in successive squarings + // of x according to bits of yi. + // accumulate powers of two into exp. + x1, xe := frexp(x) + for i := i64(yi); i != 0; i >>= 1 { + if xe < -1<<12 || 1<<12 < xe { + // catch xe before it overflows the left shift below + // Since i !=0 it has at least one bit still set, so ae will accumulate xe + // on at least one more iteration, ae += xe is a lower bound on ae + // the lower bound on ae exceeds the size of a f64 exp + // so the final call to ldexp will produce under/overflow (0/Inf) + ae += xe + break + } + if i&1 == 1 { + a1 *= x1 + ae += xe + } + x1 *= x1 + xe <<= 1 + if x1 < .5 { + x1 += x1 + xe -= 1 + } + } + + // ans = a1*2**ae + // if y < 0 { ans = 1 / ans } + // but in the opposite order + if y < 0 { + a1 = 1 / a1 + ae = -ae + } + return ldexp(a1, ae) +} + + +@(require_results) pow_f16 :: proc "contextless" (x, power: f16) -> f16 { return f16(pow_f64(f64(x), f64(power))) } +@(require_results) pow_f32 :: proc "contextless" (x, power: f32) -> f32 { return f32(pow_f64(f64(x), f64(power))) } + + + +exp_f64 :: proc "contextless" (x: f64) -> f64 { + LN2_HI :: 6.93147180369123816490e-01 + LN2_LO :: 1.90821492927058770002e-10 + LOG2_E :: 1.44269504088896338700e+00 + + OVERFLOW :: 7.09782712893383973096e+02 + UNDERFLOW :: -7.45133219101941108420e+02 + NEAR_ZERO :: 1.0 / (1 << 28) // 2**-28 + + // special cases + switch { + case is_nan(x) || is_inf(x, 1): + return x + case is_inf(x, -1): + return 0 + case x > OVERFLOW: + return inf_f64(1) + case x < UNDERFLOW: + return 0 + case -NEAR_ZERO < x && x < NEAR_ZERO: + return 1 + x + } + + // reduce; computed as r = hi - lo for extra precision. + k: int + switch { + case x < 0: + k = int(LOG2_E*x - 0.5) + case x > 0: + k = int(LOG2_E*x + 0.5) + } + hi := x - f64(k)*LN2_HI + lo := f64(k) * LN2_LO + + P1 :: 0h3FC5555555555555 // 1.66666666666666657415e-01 + P2 :: 0hBF66C16C16BEBD93 // -2.77777777770155933842e-03 + P3 :: 0h3F11566AAF25DE2C // 6.61375632143793436117e-05 + P4 :: 0hBEBBBD41C5D26BF1 // -1.65339022054652515390e-06 + P5 :: 0h3E66376972BEA4D0 // 4.13813679705723846039e-08 + + r := hi - lo + t := r * r + c := r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))) + y := 1 - ((lo - (r*c)/(2-c)) - hi) + return ldexp(y, k) +} + +@(require_results) exp_f16 :: proc "contextless" (x: f16) -> f16 { return f16(exp_f64(f64(x))) } +@(require_results) exp_f32 :: proc "contextless" (x: f32) -> f32 { return f32(exp_f64(f64(x))) } + -- cgit v1.2.3