diff options
| author | ftphikari <ftphikari@gmail.com> | 2023-07-25 15:32:18 +0300 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-07-25 15:32:18 +0300 |
| commit | 699aec331d44da58bceddfb788bf349995473ad9 (patch) | |
| tree | 3f5ce42c72c18fff1fc79f0229797be72f0e7638 /core/math | |
| parent | d2375a79f29d8377c813484bce3127ae9c205974 (diff) | |
| parent | 5ac7fe453f5fbf0995c24f0c1c12ed439ae3aee9 (diff) | |
Merge branch 'odin-lang:master' into master
Diffstat (limited to 'core/math')
| -rw-r--r-- | core/math/big/helpers.odin | 6 | ||||
| -rw-r--r-- | core/math/big/internal.odin | 4 | ||||
| -rw-r--r-- | core/math/big/radix.odin | 2 | ||||
| -rw-r--r-- | core/math/big/rat.odin | 2 | ||||
| -rw-r--r-- | core/math/cmplx/cmplx.odin | 513 | ||||
| -rw-r--r-- | core/math/cmplx/cmplx_invtrig.odin | 273 | ||||
| -rw-r--r-- | core/math/cmplx/cmplx_trig.odin | 409 | ||||
| -rw-r--r-- | core/math/ease/ease.odin | 2 | ||||
| -rw-r--r-- | core/math/math.odin | 74 | ||||
| -rw-r--r-- | core/math/math_basic.odin | 98 | ||||
| -rw-r--r-- | core/math/math_sincos.odin | 308 |
11 files changed, 1667 insertions, 24 deletions
diff --git a/core/math/big/helpers.odin b/core/math/big/helpers.odin index 6c4b5dd01..a4313a244 100644 --- a/core/math/big/helpers.odin +++ b/core/math/big/helpers.odin @@ -19,7 +19,7 @@ import rnd "core:math/rand" int_destroy :: proc(integers: ..^Int) { integers := integers - for a in &integers { + for a in integers { assert_if_nil(a) } #force_inline internal_int_destroy(..integers) @@ -408,7 +408,7 @@ clear_if_uninitialized_multi :: proc(args: ..^Int, allocator := context.allocato args := args assert_if_nil(..args) - for i in &args { + for i in args { #force_inline internal_clear_if_uninitialized_single(i, allocator) or_return } return err @@ -435,7 +435,7 @@ int_init_multi :: proc(integers: ..^Int, allocator := context.allocator) -> (err assert_if_nil(..integers) integers := integers - for a in &integers { + for a in integers { #force_inline internal_clear(a, true, allocator) or_return } return nil diff --git a/core/math/big/internal.odin b/core/math/big/internal.odin index 13aa96bef..968a26f8f 100644 --- a/core/math/big/internal.odin +++ b/core/math/big/internal.odin @@ -1857,7 +1857,7 @@ internal_root_n :: proc { internal_int_root_n, } internal_int_destroy :: proc(integers: ..^Int) { integers := integers - for a in &integers { + for &a in integers { if internal_int_allocated_cap(a) > 0 { mem.zero_slice(a.digit[:]) free(&a.digit[0]) @@ -2909,7 +2909,7 @@ internal_int_init_multi :: proc(integers: ..^Int, allocator := context.allocator context.allocator = allocator integers := integers - for a in &integers { + for a in integers { internal_clear(a) or_return } return nil diff --git a/core/math/big/radix.odin b/core/math/big/radix.odin index 2b758dc35..d15ce0e98 100644 --- a/core/math/big/radix.odin +++ b/core/math/big/radix.odin @@ -429,7 +429,7 @@ internal_int_write_to_ascii_file :: proc(a: ^Int, filename: string, radix := i8( len = l, } - ok := os.write_entire_file(name=filename, data=data, truncate=true) + ok := os.write_entire_file(filename, data, truncate=true) return nil if ok else .Cannot_Write_File } diff --git a/core/math/big/rat.odin b/core/math/big/rat.odin index c3efc30aa..35618affb 100644 --- a/core/math/big/rat.odin +++ b/core/math/big/rat.odin @@ -137,7 +137,7 @@ rat_copy :: proc(dst, src: ^Rat, minimize := false, allocator := context.allocat internal_rat_destroy :: proc(rationals: ..^Rat) { rationals := rationals - for z in &rationals { + for &z in rationals { internal_int_destroy(&z.a, &z.b) } } diff --git a/core/math/cmplx/cmplx.odin b/core/math/cmplx/cmplx.odin new file mode 100644 index 000000000..c029be30c --- /dev/null +++ b/core/math/cmplx/cmplx.odin @@ -0,0 +1,513 @@ +package math_cmplx + +import "core:builtin" +import "core:math" + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +abs :: builtin.abs +conj :: builtin.conj +real :: builtin.real +imag :: builtin.imag +jmag :: builtin.jmag +kmag :: builtin.kmag + + +sin :: proc{ + sin_complex128, +} +cos :: proc{ + cos_complex128, +} +tan :: proc{ + tan_complex128, +} +cot :: proc{ + cot_complex128, +} + + +sinh :: proc{ + sinh_complex128, +} +cosh :: proc{ + cosh_complex128, +} +tanh :: proc{ + tanh_complex128, +} + + + +// sqrt returns the square root of x. +// The result r is chosen so that real(r) ≥ 0 and imag(r) has the same sign as imag(x). +sqrt :: proc{ + sqrt_complex32, + sqrt_complex64, + sqrt_complex128, +} +ln :: proc{ + ln_complex32, + ln_complex64, + ln_complex128, +} +log10 :: proc{ + log10_complex32, + log10_complex64, + log10_complex128, +} + +exp :: proc{ + exp_complex32, + exp_complex64, + exp_complex128, +} + +pow :: proc{ + pow_complex32, + pow_complex64, + pow_complex128, +} + +phase :: proc{ + phase_complex32, + phase_complex64, + phase_complex128, +} + +polar :: proc{ + polar_complex32, + polar_complex64, + polar_complex128, +} + +is_inf :: proc{ + is_inf_complex32, + is_inf_complex64, + is_inf_complex128, +} + +is_nan :: proc{ + is_nan_complex32, + is_nan_complex64, + is_nan_complex128, +} + + + +// sqrt_complex32 returns the square root of x. +// The result r is chosen so that real(r) ≥ 0 and imag(r) has the same sign as imag(x). +sqrt_complex32 :: proc "contextless" (x: complex32) -> complex32 { + return complex32(sqrt_complex128(complex128(x))) +} + +// sqrt_complex64 returns the square root of x. +// The result r is chosen so that real(r) ≥ 0 and imag(r) has the same sign as imag(x). +sqrt_complex64 :: proc "contextless" (x: complex64) -> complex64 { + return complex64(sqrt_complex128(complex128(x))) +} + + +// sqrt_complex128 returns the square root of x. +// The result r is chosen so that real(r) ≥ 0 and imag(r) has the same sign as imag(x). +sqrt_complex128 :: proc "contextless" (x: complex128) -> complex128 { + // The original C code, the long comment, and the constants + // below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. + // The go code is a simplified version of the original C. + // + // Cephes Math Library Release 2.8: June, 2000 + // Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier + // + // The readme file at http://netlib.sandia.gov/cephes/ says: + // Some software in this archive may be from the book _Methods and + // Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster + // International, 1989) or from the Cephes Mathematical Library, a + // commercial product. In either event, it is copyrighted by the author. + // What you see here may be used freely but it comes with no support or + // guarantee. + // + // The two known misprints in the book are repaired here in the + // source listings for the gamma function and the incomplete beta + // integral. + // + // Stephen L. Moshier + // moshier@na-net.ornl.gov + + // Complex square root + // + // DESCRIPTION: + // + // If z = x + iy, r = |z|, then + // + // 1/2 + // Re w = [ (r + x)/2 ] , + // + // 1/2 + // Im w = [ (r - x)/2 ] . + // + // Cancellation error in r-x or r+x is avoided by using the + // identity 2 Re w Im w = y. + // + // Note that -w is also a square root of z. The root chosen + // is always in the right half plane and Im w has the same sign as y. + // + // ACCURACY: + // + // Relative error: + // arithmetic domain # trials peak rms + // DEC -10,+10 25000 3.2e-17 9.6e-18 + // IEEE -10,+10 1,000,000 2.9e-16 6.1e-17 + + if imag(x) == 0 { + // Ensure that imag(r) has the same sign as imag(x) for imag(x) == signed zero. + if real(x) == 0 { + return complex(0, imag(x)) + } + if real(x) < 0 { + return complex(0, math.copy_sign(math.sqrt(-real(x)), imag(x))) + } + return complex(math.sqrt(real(x)), imag(x)) + } else if math.is_inf(imag(x), 0) { + return complex(math.inf_f64(1.0), imag(x)) + } + if real(x) == 0 { + if imag(x) < 0 { + r := math.sqrt(-0.5 * imag(x)) + return complex(r, -r) + } + r := math.sqrt(0.5 * imag(x)) + return complex(r, r) + } + a := real(x) + b := imag(x) + scale: f64 + // Rescale to avoid internal overflow or underflow. + if abs(a) > 4 || abs(b) > 4 { + a *= 0.25 + b *= 0.25 + scale = 2 + } else { + a *= 1.8014398509481984e16 // 2**54 + b *= 1.8014398509481984e16 + scale = 7.450580596923828125e-9 // 2**-27 + } + r := math.hypot(a, b) + t: f64 + if a > 0 { + t = math.sqrt(0.5*r + 0.5*a) + r = scale * abs((0.5*b)/t) + t *= scale + } else { + r = math.sqrt(0.5*r - 0.5*a) + t = scale * abs((0.5*b)/r) + r *= scale + } + if b < 0 { + return complex(t, -r) + } + return complex(t, r) +} + +ln_complex32 :: proc "contextless" (x: complex32) -> complex32 { + return complex(math.ln(abs(x)), phase(x)) +} +ln_complex64 :: proc "contextless" (x: complex64) -> complex64 { + return complex(math.ln(abs(x)), phase(x)) +} +ln_complex128 :: proc "contextless" (x: complex128) -> complex128 { + return complex(math.ln(abs(x)), phase(x)) +} + + +exp_complex32 :: proc "contextless" (x: complex32) -> complex32 { + switch re, im := real(x), imag(x); { + case math.is_inf(re, 0): + switch { + case re > 0 && im == 0: + return x + case math.is_inf(im, 0) || math.is_nan(im): + if re < 0 { + return complex(0, math.copy_sign(0, im)) + } else { + return complex(math.inf_f64(1.0), math.nan_f64()) + } + } + case math.is_nan(re): + if im == 0 { + return complex(math.nan_f16(), im) + } + } + r := math.exp(real(x)) + s, c := math.sincos(imag(x)) + return complex(r*c, r*s) +} +exp_complex64 :: proc "contextless" (x: complex64) -> complex64 { + switch re, im := real(x), imag(x); { + case math.is_inf(re, 0): + switch { + case re > 0 && im == 0: + return x + case math.is_inf(im, 0) || math.is_nan(im): + if re < 0 { + return complex(0, math.copy_sign(0, im)) + } else { + return complex(math.inf_f64(1.0), math.nan_f64()) + } + } + case math.is_nan(re): + if im == 0 { + return complex(math.nan_f32(), im) + } + } + r := math.exp(real(x)) + s, c := math.sincos(imag(x)) + return complex(r*c, r*s) +} +exp_complex128 :: proc "contextless" (x: complex128) -> complex128 { + switch re, im := real(x), imag(x); { + case math.is_inf(re, 0): + switch { + case re > 0 && im == 0: + return x + case math.is_inf(im, 0) || math.is_nan(im): + if re < 0 { + return complex(0, math.copy_sign(0, im)) + } else { + return complex(math.inf_f64(1.0), math.nan_f64()) + } + } + case math.is_nan(re): + if im == 0 { + return complex(math.nan_f64(), im) + } + } + r := math.exp(real(x)) + s, c := math.sincos(imag(x)) + return complex(r*c, r*s) +} + + +pow_complex32 :: proc "contextless" (x, y: complex32) -> complex32 { + if x == 0 { // Guaranteed also true for x == -0. + if is_nan(y) { + return nan_complex32() + } + r, i := real(y), imag(y) + switch { + case r == 0: + return 1 + case r < 0: + if i == 0 { + return complex(math.inf_f16(1), 0) + } + return inf_complex32() + case r > 0: + return 0 + } + unreachable() + } + modulus := abs(x) + if modulus == 0 { + return complex(0, 0) + } + r := math.pow(modulus, real(y)) + arg := phase(x) + theta := real(y) * arg + if imag(y) != 0 { + r *= math.exp(-imag(y) * arg) + theta += imag(y) * math.ln(modulus) + } + s, c := math.sincos(theta) + return complex(r*c, r*s) +} +pow_complex64 :: proc "contextless" (x, y: complex64) -> complex64 { + if x == 0 { // Guaranteed also true for x == -0. + if is_nan(y) { + return nan_complex64() + } + r, i := real(y), imag(y) + switch { + case r == 0: + return 1 + case r < 0: + if i == 0 { + return complex(math.inf_f32(1), 0) + } + return inf_complex64() + case r > 0: + return 0 + } + unreachable() + } + modulus := abs(x) + if modulus == 0 { + return complex(0, 0) + } + r := math.pow(modulus, real(y)) + arg := phase(x) + theta := real(y) * arg + if imag(y) != 0 { + r *= math.exp(-imag(y) * arg) + theta += imag(y) * math.ln(modulus) + } + s, c := math.sincos(theta) + return complex(r*c, r*s) +} +pow_complex128 :: proc "contextless" (x, y: complex128) -> complex128 { + if x == 0 { // Guaranteed also true for x == -0. + if is_nan(y) { + return nan_complex128() + } + r, i := real(y), imag(y) + switch { + case r == 0: + return 1 + case r < 0: + if i == 0 { + return complex(math.inf_f64(1), 0) + } + return inf_complex128() + case r > 0: + return 0 + } + unreachable() + } + modulus := abs(x) + if modulus == 0 { + return complex(0, 0) + } + r := math.pow(modulus, real(y)) + arg := phase(x) + theta := real(y) * arg + if imag(y) != 0 { + r *= math.exp(-imag(y) * arg) + theta += imag(y) * math.ln(modulus) + } + s, c := math.sincos(theta) + return complex(r*c, r*s) +} + + + +log10_complex32 :: proc "contextless" (x: complex32) -> complex32 { + return math.LN10*ln(x) +} +log10_complex64 :: proc "contextless" (x: complex64) -> complex64 { + return math.LN10*ln(x) +} +log10_complex128 :: proc "contextless" (x: complex128) -> complex128 { + return math.LN10*ln(x) +} + + +phase_complex32 :: proc "contextless" (x: complex32) -> f16 { + return math.atan2(imag(x), real(x)) +} +phase_complex64 :: proc "contextless" (x: complex64) -> f32 { + return math.atan2(imag(x), real(x)) +} +phase_complex128 :: proc "contextless" (x: complex128) -> f64 { + return math.atan2(imag(x), real(x)) +} + + +rect_complex32 :: proc "contextless" (r, θ: f16) -> complex32 { + s, c := math.sincos(θ) + return complex(r*c, r*s) +} +rect_complex64 :: proc "contextless" (r, θ: f32) -> complex64 { + s, c := math.sincos(θ) + return complex(r*c, r*s) +} +rect_complex128 :: proc "contextless" (r, θ: f64) -> complex128 { + s, c := math.sincos(θ) + return complex(r*c, r*s) +} + +polar_complex32 :: proc "contextless" (x: complex32) -> (r, θ: f16) { + return abs(x), phase(x) +} +polar_complex64 :: proc "contextless" (x: complex64) -> (r, θ: f32) { + return abs(x), phase(x) +} +polar_complex128 :: proc "contextless" (x: complex128) -> (r, θ: f64) { + return abs(x), phase(x) +} + + + + +nan_complex32 :: proc "contextless" () -> complex32 { + return complex(math.nan_f16(), math.nan_f16()) +} +nan_complex64 :: proc "contextless" () -> complex64 { + return complex(math.nan_f32(), math.nan_f32()) +} +nan_complex128 :: proc "contextless" () -> complex128 { + return complex(math.nan_f64(), math.nan_f64()) +} + + +inf_complex32 :: proc "contextless" () -> complex32 { + inf := math.inf_f16(1) + return complex(inf, inf) +} +inf_complex64 :: proc "contextless" () -> complex64 { + inf := math.inf_f32(1) + return complex(inf, inf) +} +inf_complex128 :: proc "contextless" () -> complex128 { + inf := math.inf_f64(1) + return complex(inf, inf) +} + + +is_inf_complex32 :: proc "contextless" (x: complex32) -> bool { + return math.is_inf(real(x), 0) || math.is_inf(imag(x), 0) +} +is_inf_complex64 :: proc "contextless" (x: complex64) -> bool { + return math.is_inf(real(x), 0) || math.is_inf(imag(x), 0) +} +is_inf_complex128 :: proc "contextless" (x: complex128) -> bool { + return math.is_inf(real(x), 0) || math.is_inf(imag(x), 0) +} + + +is_nan_complex32 :: proc "contextless" (x: complex32) -> bool { + if math.is_inf(real(x), 0) || math.is_inf(imag(x), 0) { + return false + } + return math.is_nan(real(x)) || math.is_nan(imag(x)) +} +is_nan_complex64 :: proc "contextless" (x: complex64) -> bool { + if math.is_inf(real(x), 0) || math.is_inf(imag(x), 0) { + return false + } + return math.is_nan(real(x)) || math.is_nan(imag(x)) +} +is_nan_complex128 :: proc "contextless" (x: complex128) -> bool { + if math.is_inf(real(x), 0) || math.is_inf(imag(x), 0) { + return false + } + return math.is_nan(real(x)) || math.is_nan(imag(x)) +} diff --git a/core/math/cmplx/cmplx_invtrig.odin b/core/math/cmplx/cmplx_invtrig.odin new file mode 100644 index 000000000..a746a370f --- /dev/null +++ b/core/math/cmplx/cmplx_invtrig.odin @@ -0,0 +1,273 @@ +package math_cmplx + +import "core:builtin" +import "core:math" + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +acos :: proc{ + acos_complex32, + acos_complex64, + acos_complex128, +} +acosh :: proc{ + acosh_complex32, + acosh_complex64, + acosh_complex128, +} + +asin :: proc{ + asin_complex32, + asin_complex64, + asin_complex128, +} +asinh :: proc{ + asinh_complex32, + asinh_complex64, + asinh_complex128, +} + +atan :: proc{ + atan_complex32, + atan_complex64, + atan_complex128, +} + +atanh :: proc{ + atanh_complex32, + atanh_complex64, + atanh_complex128, +} + + +acos_complex32 :: proc "contextless" (x: complex32) -> complex32 { + w := asin(x) + return complex(math.PI/2 - real(w), -imag(w)) +} +acos_complex64 :: proc "contextless" (x: complex64) -> complex64 { + w := asin(x) + return complex(math.PI/2 - real(w), -imag(w)) +} +acos_complex128 :: proc "contextless" (x: complex128) -> complex128 { + w := asin(x) + return complex(math.PI/2 - real(w), -imag(w)) +} + + +acosh_complex32 :: proc "contextless" (x: complex32) -> complex32 { + if x == 0 { + return complex(0, math.copy_sign(math.PI/2, imag(x))) + } + w := acos(x) + if imag(w) <= 0 { + return complex(-imag(w), real(w)) + } + return complex(imag(w), -real(w)) +} +acosh_complex64 :: proc "contextless" (x: complex64) -> complex64 { + if x == 0 { + return complex(0, math.copy_sign(math.PI/2, imag(x))) + } + w := acos(x) + if imag(w) <= 0 { + return complex(-imag(w), real(w)) + } + return complex(imag(w), -real(w)) +} +acosh_complex128 :: proc "contextless" (x: complex128) -> complex128 { + if x == 0 { + return complex(0, math.copy_sign(math.PI/2, imag(x))) + } + w := acos(x) + if imag(w) <= 0 { + return complex(-imag(w), real(w)) + } + return complex(imag(w), -real(w)) +} + +asin_complex32 :: proc "contextless" (x: complex32) -> complex32 { + return complex32(asin_complex128(complex128(x))) +} +asin_complex64 :: proc "contextless" (x: complex64) -> complex64 { + return complex64(asin_complex128(complex128(x))) +} +asin_complex128 :: proc "contextless" (x: complex128) -> complex128 { + switch re, im := real(x), imag(x); { + case im == 0 && abs(re) <= 1: + return complex(math.asin(re), im) + case re == 0 && abs(im) <= 1: + return complex(re, math.asinh(im)) + case math.is_nan(im): + switch { + case re == 0: + return complex(re, math.nan_f64()) + case math.is_inf(re, 0): + return complex(math.nan_f64(), re) + case: + return nan_complex128() + } + case math.is_inf(im, 0): + switch { + case math.is_nan(re): + return x + case math.is_inf(re, 0): + return complex(math.copy_sign(math.PI/4, re), im) + case: + return complex(math.copy_sign(0, re), im) + } + case math.is_inf(re, 0): + return complex(math.copy_sign(math.PI/2, re), math.copy_sign(re, im)) + } + ct := complex(-imag(x), real(x)) // i * x + xx := x * x + x1 := complex(1-real(xx), -imag(xx)) // 1 - x*x + x2 := sqrt(x1) // x2 = sqrt(1 - x*x) + w := ln(ct + x2) + return complex(imag(w), -real(w)) // -i * w +} + +asinh_complex32 :: proc "contextless" (x: complex32) -> complex32 { + return complex32(asinh_complex128(complex128(x))) +} +asinh_complex64 :: proc "contextless" (x: complex64) -> complex64 { + return complex64(asinh_complex128(complex128(x))) +} +asinh_complex128 :: proc "contextless" (x: complex128) -> complex128 { + switch re, im := real(x), imag(x); { + case im == 0 && abs(re) <= 1: + return complex(math.asinh(re), im) + case re == 0 && abs(im) <= 1: + return complex(re, math.asin(im)) + case math.is_inf(re, 0): + switch { + case math.is_inf(im, 0): + return complex(re, math.copy_sign(math.PI/4, im)) + case math.is_nan(im): + return x + case: + return complex(re, math.copy_sign(0.0, im)) + } + case math.is_nan(re): + switch { + case im == 0: + return x + case math.is_inf(im, 0): + return complex(im, re) + case: + return nan_complex128() + } + case math.is_inf(im, 0): + return complex(math.copy_sign(im, re), math.copy_sign(math.PI/2, im)) + } + xx := x * x + x1 := complex(1+real(xx), imag(xx)) // 1 + x*x + return ln(x + sqrt(x1)) // log(x + sqrt(1 + x*x)) +} + + +atan_complex32 :: proc "contextless" (x: complex32) -> complex32 { + return complex32(atan_complex128(complex128(x))) +} +atan_complex64 :: proc "contextless" (x: complex64) -> complex64 { + return complex64(atan_complex128(complex128(x))) +} +atan_complex128 :: proc "contextless" (x: complex128) -> complex128 { + // Complex circular arc tangent + // + // DESCRIPTION: + // + // If + // z = x + iy, + // + // then + // 1 ( 2x ) + // Re w = - arctan(-----------) + k PI + // 2 ( 2 2) + // (1 - x - y ) + // + // ( 2 2) + // 1 (x + (y+1) ) + // Im w = - log(------------) + // 4 ( 2 2) + // (x + (y-1) ) + // + // Where k is an arbitrary integer. + // + // catan(z) = -i catanh(iz). + // + // ACCURACY: + // + // Relative error: + // arithmetic domain # trials peak rms + // DEC -10,+10 5900 1.3e-16 7.8e-18 + // IEEE -10,+10 30000 2.3e-15 8.5e-17 + // The check catan( ctan(z) ) = z, with |x| and |y| < PI/2, + // had peak relative error 1.5e-16, rms relative error + // 2.9e-17. See also clog(). + + switch re, im := real(x), imag(x); { + case im == 0: + return complex(math.atan(re), im) + case re == 0 && abs(im) <= 1: + return complex(re, math.atanh(im)) + case math.is_inf(im, 0) || math.is_inf(re, 0): + if math.is_nan(re) { + return complex(math.nan_f64(), math.copy_sign(0, im)) + } + return complex(math.copy_sign(math.PI/2, re), math.copy_sign(0, im)) + case math.is_nan(re) || math.is_nan(im): + return nan_complex128() + } + x2 := real(x) * real(x) + a := 1 - x2 - imag(x)*imag(x) + if a == 0 { + return nan_complex128() + } + t := 0.5 * math.atan2(2*real(x), a) + w := _reduce_pi_f64(t) + + t = imag(x) - 1 + b := x2 + t*t + if b == 0 { + return nan_complex128() + } + t = imag(x) + 1 + c := (x2 + t*t) / b + return complex(w, 0.25*math.ln(c)) +} + +atanh_complex32 :: proc "contextless" (x: complex32) -> complex32 { + z := complex(-imag(x), real(x)) // z = i * x + z = atan(z) + return complex(imag(z), -real(z)) // z = -i * z +} +atanh_complex64 :: proc "contextless" (x: complex64) -> complex64 { + z := complex(-imag(x), real(x)) // z = i * x + z = atan(z) + return complex(imag(z), -real(z)) // z = -i * z +} +atanh_complex128 :: proc "contextless" (x: complex128) -> complex128 { + z := complex(-imag(x), real(x)) // z = i * x + z = atan(z) + return complex(imag(z), -real(z)) // z = -i * z +}
\ No newline at end of file diff --git a/core/math/cmplx/cmplx_trig.odin b/core/math/cmplx/cmplx_trig.odin new file mode 100644 index 000000000..7ca404fab --- /dev/null +++ b/core/math/cmplx/cmplx_trig.odin @@ -0,0 +1,409 @@ +package math_cmplx + +import "core:math" +import "core:math/bits" + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +sin_complex128 :: proc "contextless" (x: complex128) -> complex128 { + // Complex circular sine + // + // DESCRIPTION: + // + // If + // z = x + iy, + // + // then + // + // w = sin x cosh y + i cos x sinh y. + // + // csin(z) = -i csinh(iz). + // + // ACCURACY: + // + // Relative error: + // arithmetic domain # trials peak rms + // DEC -10,+10 8400 5.3e-17 1.3e-17 + // IEEE -10,+10 30000 3.8e-16 1.0e-16 + // Also tested by csin(casin(z)) = z. + + switch re, im := real(x), imag(x); { + case im == 0 && (math.is_inf(re, 0) || math.is_nan(re)): + return complex(math.nan_f64(), im) + case math.is_inf(im, 0): + switch { + case re == 0: + return x + case math.is_inf(re, 0) || math.is_nan(re): + return complex(math.nan_f64(), im) + } + case re == 0 && math.is_nan(im): + return x + } + s, c := math.sincos(real(x)) + sh, ch := _sinhcosh_f64(imag(x)) + return complex(s*ch, c*sh) +} + +cos_complex128 :: proc "contextless" (x: complex128) -> complex128 { + // Complex circular cosine + // + // DESCRIPTION: + // + // If + // z = x + iy, + // + // then + // + // w = cos x cosh y - i sin x sinh y. + // + // ACCURACY: + // + // Relative error: + // arithmetic domain # trials peak rms + // DEC -10,+10 8400 4.5e-17 1.3e-17 + // IEEE -10,+10 30000 3.8e-16 1.0e-16 + + switch re, im := real(x), imag(x); { + case im == 0 && (math.is_inf(re, 0) || math.is_nan(re)): + return complex(math.nan_f64(), -im*math.copy_sign(0, re)) + case math.is_inf(im, 0): + switch { + case re == 0: + return complex(math.inf_f64(1), -re*math.copy_sign(0, im)) + case math.is_inf(re, 0) || math.is_nan(re): + return complex(math.inf_f64(1), math.nan_f64()) + } + case re == 0 && math.is_nan(im): + return complex(math.nan_f64(), 0) + } + s, c := math.sincos(real(x)) + sh, ch := _sinhcosh_f64(imag(x)) + return complex(c*ch, -s*sh) +} + +sinh_complex128 :: proc "contextless" (x: complex128) -> complex128 { + // Complex hyperbolic sine + // + // DESCRIPTION: + // + // csinh z = (cexp(z) - cexp(-z))/2 + // = sinh x * cos y + i cosh x * sin y . + // + // ACCURACY: + // + // Relative error: + // arithmetic domain # trials peak rms + // IEEE -10,+10 30000 3.1e-16 8.2e-17 + + switch re, im := real(x), imag(x); { + case re == 0 && (math.is_inf(im, 0) || math.is_nan(im)): + return complex(re, math.nan_f64()) + case math.is_inf(re, 0): + switch { + case im == 0: + return complex(re, im) + case math.is_inf(im, 0) || math.is_nan(im): + return complex(re, math.nan_f64()) + } + case im == 0 && math.is_nan(re): + return complex(math.nan_f64(), im) + } + s, c := math.sincos(imag(x)) + sh, ch := _sinhcosh_f64(real(x)) + return complex(c*sh, s*ch) +} + +cosh_complex128 :: proc "contextless" (x: complex128) -> complex128 { + // Complex hyperbolic cosine + // + // DESCRIPTION: + // + // ccosh(z) = cosh x cos y + i sinh x sin y . + // + // ACCURACY: + // + // Relative error: + // arithmetic domain # trials peak rms + // IEEE -10,+10 30000 2.9e-16 8.1e-17 + + switch re, im := real(x), imag(x); { + case re == 0 && (math.is_inf(im, 0) || math.is_nan(im)): + return complex(math.nan_f64(), re*math.copy_sign(0, im)) + case math.is_inf(re, 0): + switch { + case im == 0: + return complex(math.inf_f64(1), im*math.copy_sign(0, re)) + case math.is_inf(im, 0) || math.is_nan(im): + return complex(math.inf_f64(1), math.nan_f64()) + } + case im == 0 && math.is_nan(re): + return complex(math.nan_f64(), im) + } + s, c := math.sincos(imag(x)) + sh, ch := _sinhcosh_f64(real(x)) + return complex(c*ch, s*sh) +} + +tan_complex128 :: proc "contextless" (x: complex128) -> complex128 { + // Complex circular tangent + // + // DESCRIPTION: + // + // If + // z = x + iy, + // + // then + // + // sin 2x + i sinh 2y + // w = --------------------. + // cos 2x + cosh 2y + // + // On the real axis the denominator is zero at odd multiples + // of PI/2. The denominator is evaluated by its Taylor + // series near these points. + // + // ctan(z) = -i ctanh(iz). + // + // ACCURACY: + // + // Relative error: + // arithmetic domain # trials peak rms + // DEC -10,+10 5200 7.1e-17 1.6e-17 + // IEEE -10,+10 30000 7.2e-16 1.2e-16 + // Also tested by ctan * ccot = 1 and catan(ctan(z)) = z. + + switch re, im := real(x), imag(x); { + case math.is_inf(im, 0): + switch { + case math.is_inf(re, 0) || math.is_nan(re): + return complex(math.copy_sign(0, re), math.copy_sign(1, im)) + } + return complex(math.copy_sign(0, math.sin(2*re)), math.copy_sign(1, im)) + case re == 0 && math.is_nan(im): + return x + } + d := math.cos(2*real(x)) + math.cosh(2*imag(x)) + if abs(d) < 0.25 { + d = _tan_series_f64(x) + } + if d == 0 { + return inf_complex128() + } + return complex(math.sin(2*real(x))/d, math.sinh(2*imag(x))/d) +} + +tanh_complex128 :: proc "contextless" (x: complex128) -> complex128 { + switch re, im := real(x), imag(x); { + case math.is_inf(re, 0): + switch { + case math.is_inf(im, 0) || math.is_nan(im): + return complex(math.copy_sign(1, re), math.copy_sign(0, im)) + } + return complex(math.copy_sign(1, re), math.copy_sign(0, math.sin(2*im))) + case im == 0 && math.is_nan(re): + return x + } + d := math.cosh(2*real(x)) + math.cos(2*imag(x)) + if d == 0 { + return inf_complex128() + } + return complex(math.sinh(2*real(x))/d, math.sin(2*imag(x))/d) +} + +cot_complex128 :: proc "contextless" (x: complex128) -> complex128 { + d := math.cosh(2*imag(x)) - math.cos(2*real(x)) + if abs(d) < 0.25 { + d = _tan_series_f64(x) + } + if d == 0 { + return inf_complex128() + } + return complex(math.sin(2*real(x))/d, -math.sinh(2*imag(x))/d) +} + + +@(private="file") +_sinhcosh_f64 :: proc "contextless" (x: f64) -> (sh, ch: f64) { + if abs(x) <= 0.5 { + return math.sinh(x), math.cosh(x) + } + e := math.exp(x) + ei := 0.5 / e + e *= 0.5 + return e - ei, e + ei +} + + +// taylor series of cosh(2y) - cos(2x) +@(private) +_tan_series_f64 :: proc "contextless" (z: complex128) -> f64 { + MACH_EPSILON :: 1.0 / (1 << 53) + + x := abs(2 * real(z)) + y := abs(2 * imag(z)) + x = _reduce_pi_f64(x) + x, y = x * x, y * y + x2, y2 := 1.0, 1.0 + f, rn, d := 1.0, 0.0, 0.0 + + for { + rn += 1 + f *= rn + rn += 1 + f *= rn + x2 *= x + y2 *= y + t := y2 + x2 + t /= f + d += t + + rn += 1 + f *= rn + rn += 1 + f *= rn + x2 *= x + y2 *= y + t = y2 - x2 + t /= f + d += t + if !(abs(t/d) > MACH_EPSILON) { // don't use <=, because of floating point nonsense and NaN + break + } + } + return d +} + +// _reduce_pi_f64 reduces the input argument x to the range (-PI/2, PI/2]. +// x must be greater than or equal to 0. For small arguments it +// uses Cody-Waite reduction in 3 f64 parts based on: +// "Elementary Function Evaluation: Algorithms and Implementation" +// Jean-Michel Muller, 1997. +// For very large arguments it uses Payne-Hanek range reduction based on: +// "ARGUMENT REDUCTION FOR HUGE ARGUMENTS: Good to the Last Bit" +@(private) +_reduce_pi_f64 :: proc "contextless" (x: f64) -> f64 #no_bounds_check { + x := x + + // REDUCE_THRESHOLD is the maximum value of x where the reduction using + // Cody-Waite reduction still gives accurate results. This threshold + // is set by t*PIn being representable as a f64 without error + // where t is given by t = floor(x * (1 / PI)) and PIn are the leading partial + // terms of PI. Since the leading terms, PI1 and PI2 below, have 30 and 32 + // trailing zero bits respectively, t should have less than 30 significant bits. + // t < 1<<30 -> floor(x*(1/PI)+0.5) < 1<<30 -> x < (1<<30-1) * PI - 0.5 + // So, conservatively we can take x < 1<<30. + REDUCE_THRESHOLD :: f64(1 << 30) + + if abs(x) < REDUCE_THRESHOLD { + // Use Cody-Waite reduction in three parts. + // PI1, PI2 and PI3 comprise an extended precision value of PI + // such that PI ~= PI1 + PI2 + PI3. The parts are chosen so + // that PI1 and PI2 have an approximately equal number of trailing + // zero bits. This ensures that t*PI1 and t*PI2 are exact for + // large integer values of t. The full precision PI3 ensures the + // approximation of PI is accurate to 102 bits to handle cancellation + // during subtraction. + PI1 :: 0h400921fb40000000 // 3.141592502593994 + PI2 :: 0h3e84442d00000000 // 1.5099578831723193e-07 + PI3 :: 0h3d08469898cc5170 // 1.0780605716316238e-14 + + t := x / math.PI + t += 0.5 + t = f64(i64(t)) // i64(t) = the multiple + return ((x - t*PI1) - t*PI2) - t*PI3 + } + // Must apply Payne-Hanek range reduction + MASK :: 0x7FF + SHIFT :: 64 - 11 - 1 + BIAS :: 1023 + FRAC_MASK :: 1<<SHIFT - 1 + + // Extract out the integer and exponent such that, + // x = ix * 2 ** exp. + ix := transmute(u64)(x) + exp := int(ix>>SHIFT&MASK) - BIAS - SHIFT + ix &= FRAC_MASK + ix |= 1 << SHIFT + + // bdpi is the binary digits of 1/PI as a u64 array, + // that is, 1/PI = SUM bdpi[i]*2^(-64*i). + // 19 64-bit digits give 1216 bits of precision + // to handle the largest possible f64 exponent. + @static bdpi := [?]u64{ + 0x0000000000000000, + 0x517cc1b727220a94, + 0xfe13abe8fa9a6ee0, + 0x6db14acc9e21c820, + 0xff28b1d5ef5de2b0, + 0xdb92371d2126e970, + 0x0324977504e8c90e, + 0x7f0ef58e5894d39f, + 0x74411afa975da242, + 0x74ce38135a2fbf20, + 0x9cc8eb1cc1a99cfa, + 0x4e422fc5defc941d, + 0x8ffc4bffef02cc07, + 0xf79788c5ad05368f, + 0xb69b3f6793e584db, + 0xa7a31fb34f2ff516, + 0xba93dd63f5f2f8bd, + 0x9e839cfbc5294975, + 0x35fdafd88fc6ae84, + 0x2b0198237e3db5d5, + } + + // Use the exponent to extract the 3 appropriate u64 digits from bdpi, + // B ~ (z0, z1, z2), such that the product leading digit has the exponent -64. + // Note, exp >= 50 since x >= REDUCE_THRESHOLD and exp < 971 for maximum f64. + digit, bitshift := uint(exp+64)/64, uint(exp+64)%64 + z0 := (bdpi[digit] << bitshift) | (bdpi[digit+1] >> (64 - bitshift)) + z1 := (bdpi[digit+1] << bitshift) | (bdpi[digit+2] >> (64 - bitshift)) + z2 := (bdpi[digit+2] << bitshift) | (bdpi[digit+3] >> (64 - bitshift)) + + // Multiply mantissa by the digits and extract the upper two digits (hi, lo). + z2hi, _ := bits.mul(z2, ix) + z1hi, z1lo := bits.mul(z1, ix) + z0lo := z0 * ix + lo, c := bits.add(z1lo, z2hi, 0) + hi, _ := bits.add(z0lo, z1hi, c) + + // Find the magnitude of the fraction. + lz := uint(bits.leading_zeros(hi)) + e := u64(BIAS - (lz + 1)) + + // Clear implicit mantissa bit and shift into place. + hi = (hi << (lz + 1)) | (lo >> (64 - (lz + 1))) + hi >>= 64 - SHIFT + + // Include the exponent and convert to a float. + hi |= e << SHIFT + x = transmute(f64)(hi) + + // map to (-PI/2, PI/2] + if x > 0.5 { + x -= 1 + } + return math.PI * x +} + diff --git a/core/math/ease/ease.odin b/core/math/ease/ease.odin index d5cb85dd8..0e6569bca 100644 --- a/core/math/ease/ease.odin +++ b/core/math/ease/ease.odin @@ -450,7 +450,7 @@ flux_tween_init :: proc(tween: ^Flux_Tween($T), duration: time.Duration) where i flux_update :: proc(flux: ^Flux_Map($T), dt: f64) where intrinsics.type_is_float(T) { clear(&flux.keys_to_be_deleted) - for key, tween in &flux.values { + for key, &tween in flux.values { delay_remainder := f64(0) // Update delay if necessary. diff --git a/core/math/math.odin b/core/math/math.odin index 05177378f..6f7a36bab 100644 --- a/core/math/math.odin +++ b/core/math/math.odin @@ -2158,6 +2158,80 @@ signbit :: proc{ } +@(require_results) +hypot_f16 :: proc "contextless" (x, y: f16) -> (r: f16) { + p, q := abs(x), abs(y) + switch { + case is_inf(p, 1) || is_inf(q, 1): + return inf_f16(1) + case is_nan(p) || is_nan(q): + return nan_f16() + } + if p < q { + p, q = q, p + } + if p == 0 { + return 0 + } + q = q / p + return p * sqrt(1+q*q) +} +@(require_results) +hypot_f32 :: proc "contextless" (x, y: f32) -> (r: f32) { + p, q := abs(x), abs(y) + switch { + case is_inf(p, 1) || is_inf(q, 1): + return inf_f32(1) + case is_nan(p) || is_nan(q): + return nan_f32() + } + if p < q { + p, q = q, p + } + if p == 0 { + return 0 + } + q = q / p + return p * sqrt(1+q*q) +} +@(require_results) +hypot_f64 :: proc "contextless" (x, y: f64) -> (r: f64) { + p, q := abs(x), abs(y) + switch { + case is_inf(p, 1) || is_inf(q, 1): + return inf_f64(1) + case is_nan(p) || is_nan(q): + return nan_f64() + } + if p < q { + p, q = q, p + } + if p == 0 { + return 0 + } + q = q / p + return p * sqrt(1+q*q) +} +@(require_results) hypot_f16le :: proc "contextless" (x, y: f16le) -> (r: f16le) { return f16le(hypot_f16(f16(x), f16(y))) } +@(require_results) hypot_f16be :: proc "contextless" (x, y: f16be) -> (r: f16be) { return f16be(hypot_f16(f16(x), f16(y))) } +@(require_results) hypot_f32le :: proc "contextless" (x, y: f32le) -> (r: f32le) { return f32le(hypot_f32(f32(x), f32(y))) } +@(require_results) hypot_f32be :: proc "contextless" (x, y: f32be) -> (r: f32be) { return f32be(hypot_f32(f32(x), f32(y))) } +@(require_results) hypot_f64le :: proc "contextless" (x, y: f64le) -> (r: f64le) { return f64le(hypot_f64(f64(x), f64(y))) } +@(require_results) hypot_f64be :: proc "contextless" (x, y: f64be) -> (r: f64be) { return f64be(hypot_f64(f64(x), f64(y))) } + +// hypot returns Sqrt(p*p + q*q), taking care to avoid unnecessary overflow and underflow. +// +// Special cases: +// hypot(±Inf, q) = +Inf +// hypot(p, ±Inf) = +Inf +// hypot(NaN, q) = NaN +// hypot(p, NaN) = NaN +hypot :: proc{ + hypot_f16, hypot_f16le, hypot_f16be, + hypot_f32, hypot_f32le, hypot_f32be, + hypot_f64, hypot_f64le, hypot_f64be, +} + F16_DIG :: 3 F16_EPSILON :: 0.00097656 F16_GUARD :: 0 diff --git a/core/math/math_basic.odin b/core/math/math_basic.odin index 785c43b10..95e0a93ec 100644 --- a/core/math/math_basic.odin +++ b/core/math/math_basic.odin @@ -3,45 +3,111 @@ package math import "core:intrinsics" -@(default_calling_convention="none") +@(default_calling_convention="none", private="file") foreign _ { @(link_name="llvm.sin.f16", require_results) - sin_f16 :: proc(θ: f16) -> f16 --- + _sin_f16 :: proc(θ: f16) -> f16 --- @(link_name="llvm.sin.f32", require_results) - sin_f32 :: proc(θ: f32) -> f32 --- + _sin_f32 :: proc(θ: f32) -> f32 --- @(link_name="llvm.sin.f64", require_results) - sin_f64 :: proc(θ: f64) -> f64 --- + _sin_f64 :: proc(θ: f64) -> f64 --- @(link_name="llvm.cos.f16", require_results) - cos_f16 :: proc(θ: f16) -> f16 --- + _cos_f16 :: proc(θ: f16) -> f16 --- @(link_name="llvm.cos.f32", require_results) - cos_f32 :: proc(θ: f32) -> f32 --- + _cos_f32 :: proc(θ: f32) -> f32 --- @(link_name="llvm.cos.f64", require_results) - cos_f64 :: proc(θ: f64) -> f64 --- + _cos_f64 :: proc(θ: f64) -> f64 --- @(link_name="llvm.pow.f16", require_results) - pow_f16 :: proc(x, power: f16) -> f16 --- + _pow_f16 :: proc(x, power: f16) -> f16 --- @(link_name="llvm.pow.f32", require_results) - pow_f32 :: proc(x, power: f32) -> f32 --- + _pow_f32 :: proc(x, power: f32) -> f32 --- @(link_name="llvm.pow.f64", require_results) - pow_f64 :: proc(x, power: f64) -> f64 --- + _pow_f64 :: proc(x, power: f64) -> f64 --- @(link_name="llvm.fmuladd.f16", require_results) - fmuladd_f16 :: proc(a, b, c: f16) -> f16 --- + _fmuladd_f16 :: proc(a, b, c: f16) -> f16 --- @(link_name="llvm.fmuladd.f32", require_results) - fmuladd_f32 :: proc(a, b, c: f32) -> f32 --- + _fmuladd_f32 :: proc(a, b, c: f32) -> f32 --- @(link_name="llvm.fmuladd.f64", require_results) - fmuladd_f64 :: proc(a, b, c: f64) -> f64 --- + _fmuladd_f64 :: proc(a, b, c: f64) -> f64 --- @(link_name="llvm.exp.f16", require_results) - exp_f16 :: proc(x: f16) -> f16 --- + _exp_f16 :: proc(x: f16) -> f16 --- @(link_name="llvm.exp.f32", require_results) - exp_f32 :: proc(x: f32) -> f32 --- + _exp_f32 :: proc(x: f32) -> f32 --- @(link_name="llvm.exp.f64", require_results) - exp_f64 :: proc(x: f64) -> f64 --- + _exp_f64 :: proc(x: f64) -> f64 --- } @(require_results) +sin_f16 :: proc "contextless" (θ: f16) -> f16 { + return _sin_f16(θ) +} +@(require_results) +sin_f32 :: proc "contextless" (θ: f32) -> f32 { + return _sin_f32(θ) +} +@(require_results) +sin_f64 :: proc "contextless" (θ: f64) -> f64 { + return _sin_f64(θ) +} + +@(require_results) +cos_f16 :: proc "contextless" (θ: f16) -> f16 { + return _cos_f16(θ) +} +@(require_results) +cos_f32 :: proc "contextless" (θ: f32) -> f32 { + return _cos_f32(θ) +} +@(require_results) +cos_f64 :: proc "contextless" (θ: f64) -> f64 { + return _cos_f64(θ) +} + +@(require_results) +pow_f16 :: proc "contextless" (x, power: f16) -> f16 { + return _pow_f16(x, power) +} +@(require_results) +pow_f32 :: proc "contextless" (x, power: f32) -> f32 { + return _pow_f32(x, power) +} +@(require_results) +pow_f64 :: proc "contextless" (x, power: f64) -> f64 { + return _pow_f64(x, power) +} + +@(require_results) +fmuladd_f16 :: proc "contextless" (a, b, c: f16) -> f16 { + return _fmuladd_f16(a, b, c) +} +@(require_results) +fmuladd_f32 :: proc "contextless" (a, b, c: f32) -> f32 { + return _fmuladd_f32(a, b, c) +} +@(require_results) +fmuladd_f64 :: proc "contextless" (a, b, c: f64) -> f64 { + return _fmuladd_f64(a, b, c) +} + +@(require_results) +exp_f16 :: proc "contextless" (x: f16) -> f16 { + return _exp_f16(x) +} +@(require_results) +exp_f32 :: proc "contextless" (x: f32) -> f32 { + return _exp_f32(x) +} +@(require_results) +exp_f64 :: proc "contextless" (x: f64) -> f64 { + return _exp_f64(x) +} + + +@(require_results) sqrt_f16 :: proc "contextless" (x: f16) -> f16 { return intrinsics.sqrt(x) } diff --git a/core/math/math_sincos.odin b/core/math/math_sincos.odin new file mode 100644 index 000000000..578876ac5 --- /dev/null +++ b/core/math/math_sincos.odin @@ -0,0 +1,308 @@ +package math + +import "core:math/bits" + +// The original C code, the long comment, and the constants +// below were from http://netlib.sandia.gov/cephes/cmath/sin.c, +// available from http://www.netlib.org/cephes/cmath.tgz. +// The go code is a simplified version of the original C. +// +// sin.c +// +// Circular sine +// +// SYNOPSIS: +// +// double x, y, sin(); +// y = sin( x ); +// +// DESCRIPTION: +// +// Range reduction is into intervals of pi/4. The reduction error is nearly +// eliminated by contriving an extended precision modular arithmetic. +// +// Two polynomial approximating functions are employed. +// Between 0 and pi/4 the sine is approximated by +// x + x**3 P(x**2). +// Between pi/4 and pi/2 the cosine is represented as +// 1 - x**2 Q(x**2). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC 0, 10 150000 3.0e-17 7.8e-18 +// IEEE -1.07e9,+1.07e9 130000 2.1e-16 5.4e-17 +// +// Partial loss of accuracy begins to occur at x = 2**30 = 1.074e9. The loss +// is not gradual, but jumps suddenly to about 1 part in 10e7. Results may +// be meaningless for x > 2**49 = 5.6e14. +// +// cos.c +// +// Circular cosine +// +// SYNOPSIS: +// +// double x, y, cos(); +// y = cos( x ); +// +// DESCRIPTION: +// +// Range reduction is into intervals of pi/4. The reduction error is nearly +// eliminated by contriving an extended precision modular arithmetic. +// +// Two polynomial approximating functions are employed. +// Between 0 and pi/4 the cosine is approximated by +// 1 - x**2 Q(x**2). +// Between pi/4 and pi/2 the sine is represented as +// x + x**3 P(x**2). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// IEEE -1.07e9,+1.07e9 130000 2.1e-16 5.4e-17 +// DEC 0,+1.07e9 17000 3.0e-17 7.2e-18 +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +sincos :: proc{ + sincos_f16, sincos_f16le, sincos_f16be, + sincos_f32, sincos_f32le, sincos_f32be, + sincos_f64, sincos_f64le, sincos_f64be, +} + +sincos_f16 :: proc "contextless" (x: f16) -> (sin, cos: f16) #no_bounds_check { + s, c := sincos_f64(f64(x)) + return f16(s), f16(c) +} +sincos_f16le :: proc "contextless" (x: f16le) -> (sin, cos: f16le) #no_bounds_check { + s, c := sincos_f64(f64(x)) + return f16le(s), f16le(c) +} +sincos_f16be :: proc "contextless" (x: f16be) -> (sin, cos: f16be) #no_bounds_check { + s, c := sincos_f64(f64(x)) + return f16be(s), f16be(c) +} + +sincos_f32 :: proc "contextless" (x: f32) -> (sin, cos: f32) #no_bounds_check { + s, c := sincos_f64(f64(x)) + return f32(s), f32(c) +} +sincos_f32le :: proc "contextless" (x: f32le) -> (sin, cos: f32le) #no_bounds_check { + s, c := sincos_f64(f64(x)) + return f32le(s), f32le(c) +} +sincos_f32be :: proc "contextless" (x: f32be) -> (sin, cos: f32be) #no_bounds_check { + s, c := sincos_f64(f64(x)) + return f32be(s), f32be(c) +} + +sincos_f64le :: proc "contextless" (x: f64le) -> (sin, cos: f64le) #no_bounds_check { + s, c := sincos_f64(f64(x)) + return f64le(s), f64le(c) +} +sincos_f64be :: proc "contextless" (x: f64be) -> (sin, cos: f64be) #no_bounds_check { + s, c := sincos_f64(f64(x)) + return f64be(s), f64be(c) +} + +sincos_f64 :: proc "contextless" (x: f64) -> (sin, cos: f64) #no_bounds_check { + x := x + + PI4A :: 0h3fe921fb40000000 // 7.85398125648498535156e-1 PI/4 split into three parts + PI4B :: 0h3e64442d00000000 // 3.77489470793079817668e-8 + PI4C :: 0h3ce8469898cc5170 // 2.69515142907905952645e-15 + + // special cases + switch { + case x == 0: + return x, 1 // return ±0.0, 1.0 + case is_nan(x) || is_inf(x, 0): + return nan_f64(), nan_f64() + } + + // make argument positive + sin_sign, cos_sign := false, false + if x < 0 { + x = -x + sin_sign = true + } + + j: u64 + y, z: f64 + if x >= REDUCE_THRESHOLD { + j, z = _trig_reduce_f64(x) + } else { + j = u64(x * (4 / PI)) // integer part of x/(PI/4), as integer for tests on the phase angle + y = f64(j) // integer part of x/(PI/4), as float + + if j&1 == 1 { // map zeros to origin + j += 1 + y += 1 + } + j &= 7 // octant modulo TAU radians (360 degrees) + z = ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic + } + if j > 3 { // reflect in x axis + j -= 4 + sin_sign, cos_sign = !sin_sign, !cos_sign + } + if j > 1 { + cos_sign = !cos_sign + } + + zz := z * z + + cos = 1.0 - 0.5*zz + zz*zz*((((((_cos[0]*zz)+_cos[1])*zz+_cos[2])*zz+_cos[3])*zz+_cos[4])*zz+_cos[5]) + sin = z + z*zz*((((((_sin[0]*zz)+_sin[1])*zz+_sin[2])*zz+_sin[3])*zz+_sin[4])*zz+_sin[5]) + + if j == 1 || j == 2 { + sin, cos = cos, sin + } + if cos_sign { + cos = -cos + } + if sin_sign { + sin = -sin + } + return +} + +// sin coefficients +@(private="file") +_sin := [?]f64{ + 0h3de5d8fd1fd19ccd, // 1.58962301576546568060e-10 + 0hbe5ae5e5a9291f5d, // -2.50507477628578072866e-8 + 0h3ec71de3567d48a1, // 2.75573136213857245213e-6 + 0hbf2a01a019bfdf03, // -1.98412698295895385996e-4 + 0h3f8111111110f7d0, // 8.33333333332211858878e-3 + 0hbfc5555555555548, // -1.66666666666666307295e-1 +} + +// cos coefficients +@(private="file") +_cos := [?]f64{ + 0hbda8fa49a0861a9b, // -1.13585365213876817300e-11, + 0h3e21ee9d7b4e3f05, // 2.08757008419747316778e-9, + 0hbe927e4f7eac4bc6, // -2.75573141792967388112e-7, + 0h3efa01a019c844f5, // 2.48015872888517045348e-5, + 0hbf56c16c16c14f91, // -1.38888888888730564116e-3, + 0h3fa555555555554b, // 4.16666666666665929218e-2, +} + +// REDUCE_THRESHOLD is the maximum value of x where the reduction using Pi/4 +// in 3 f64 parts still gives accurate results. This threshold +// is set by y*C being representable as a f64 without error +// where y is given by y = floor(x * (4 / Pi)) and C is the leading partial +// terms of 4/Pi. Since the leading terms (PI4A and PI4B in sin.go) have 30 +// and 32 trailing zero bits, y should have less than 30 significant bits. +// +// y < 1<<30 -> floor(x*4/Pi) < 1<<30 -> x < (1<<30 - 1) * Pi/4 +// +// So, conservatively we can take x < 1<<29. +// Above this threshold Payne-Hanek range reduction must be used. +@(private="file") +REDUCE_THRESHOLD :: 1 << 29 + +// _trig_reduce_f64 implements Payne-Hanek range reduction by Pi/4 +// for x > 0. It returns the integer part mod 8 (j) and +// the fractional part (z) of x / (Pi/4). +// The implementation is based on: +// "ARGUMENT REDUCTION FOR HUGE ARGUMENTS: Good to the Last Bit" +// K. C. Ng et al, March 24, 1992 +// The simulated multi-precision calculation of x*B uses 64-bit integer arithmetic. +_trig_reduce_f64 :: proc "contextless" (x: f64) -> (j: u64, z: f64) #no_bounds_check { + // bd_pi4 is the binary digits of 4/pi as a u64 array, + // that is, 4/pi = Sum bd_pi4[i]*2^(-64*i) + // 19 64-bit digits and the leading one bit give 1217 bits + // of precision to handle the largest possible f64 exponent. + @static bd_pi4 := [?]u64{ + 0x0000000000000001, + 0x45f306dc9c882a53, + 0xf84eafa3ea69bb81, + 0xb6c52b3278872083, + 0xfca2c757bd778ac3, + 0x6e48dc74849ba5c0, + 0x0c925dd413a32439, + 0xfc3bd63962534e7d, + 0xd1046bea5d768909, + 0xd338e04d68befc82, + 0x7323ac7306a673e9, + 0x3908bf177bf25076, + 0x3ff12fffbc0b301f, + 0xde5e2316b414da3e, + 0xda6cfd9e4f96136e, + 0x9e8c7ecd3cbfd45a, + 0xea4f758fd7cbe2f6, + 0x7a0e73ef14a525d4, + 0xd7f6bf623f1aba10, + 0xac06608df8f6d757, + } + + PI4 :: PI / 4 + if x < PI4 { + return 0, x + } + + MASK :: 0x7FF + SHIFT :: 64 - 11 - 1 + BIAS :: 1023 + + // Extract out the integer and exponent such that, + // x = ix * 2 ** exp. + ix := transmute(u64)x + exp := int(ix>>SHIFT&MASK) - BIAS - SHIFT + ix &~= MASK << SHIFT + ix |= 1 << SHIFT + // Use the exponent to extract the 3 appropriate u64 digits from bd_pi4, + // B ~ (z0, z1, z2), such that the product leading digit has the exponent -61. + // Note, exp >= -53 since x >= PI4 and exp < 971 for maximum f64. + digit, bitshift := uint(exp+61)/64, uint(exp+61)%64 + z0 := (bd_pi4[digit] << bitshift) | (bd_pi4[digit+1] >> (64 - bitshift)) + z1 := (bd_pi4[digit+1] << bitshift) | (bd_pi4[digit+2] >> (64 - bitshift)) + z2 := (bd_pi4[digit+2] << bitshift) | (bd_pi4[digit+3] >> (64 - bitshift)) + // Multiply mantissa by the digits and extract the upper two digits (hi, lo). + z2hi, _ := bits.mul(z2, ix) + z1hi, z1lo := bits.mul(z1, ix) + z0lo := z0 * ix + lo, c := bits.add(z1lo, z2hi, 0) + hi, _ := bits.add(z0lo, z1hi, c) + // The top 3 bits are j. + j = hi >> 61 + // Extract the fraction and find its magnitude. + hi = hi<<3 | lo>>61 + lz := uint(bits.leading_zeros(hi)) + e := u64(BIAS - (lz + 1)) + // Clear implicit mantissa bit and shift into place. + hi = (hi << (lz + 1)) | (lo >> (64 - (lz + 1))) + hi >>= 64 - SHIFT + // Include the exponent and convert to a float. + hi |= e << SHIFT + z = transmute(f64)hi + // Map zeros to origin. + if j&1 == 1 { + j += 1 + j &= 7 + z -= 1 + } + // Multiply the fractional part by pi/4. + return j, z * PI4 +} |