diff options
Diffstat (limited to 'core/math/math_fmuladd.odin')
| -rw-r--r-- | core/math/math_fmuladd.odin | 218 |
1 files changed, 218 insertions, 0 deletions
diff --git a/core/math/math_fmuladd.odin b/core/math/math_fmuladd.odin new file mode 100644 index 000000000..632502454 --- /dev/null +++ b/core/math/math_fmuladd.odin @@ -0,0 +1,218 @@ +package math + +import "base:intrinsics" +_ :: intrinsics + +@(require_results) +fmuladd_f16 :: proc "contextless" (a, b, c: f16) -> f16 { + when IS_WASM { + return f16(fmuladd_f64(f64(a), f64(b), f64(c))) + } else { + foreign _ { + @(link_name="llvm.fmuladd.f16", require_results) + _fmuladd_f16 :: proc "none" (a, b, c: f16) -> f16 --- + } + + return _fmuladd_f16(a, b, c) + } +} +@(require_results) +fmuladd_f32 :: proc "contextless" (a, b, c: f32) -> f32 { + when IS_WASM { + return f32(fmuladd_f64(f64(a), f64(b), f64(c))) + } else { + foreign _ { + @(link_name="llvm.fmuladd.f32", require_results) + _fmuladd_f32 :: proc "none" (a, b, c: f32) -> f32 --- + } + + return _fmuladd_f32(a, b, c) + } +} +@(require_results) +fmuladd_f64 :: proc "contextless" (a, b, c: f64) -> f64 { + when IS_WASM { + return #force_inline fmuladd_slow_f64(a, b, c) + } else { + foreign _ { + @(link_name="llvm.fmuladd.f64", require_results) + _fmuladd_f64 :: proc "none" (a, b, c: f64) -> f64 --- + } + + return _fmuladd_f64(a, b, c) + } +} + + +@(require_results) +fmuladd_slow_f64 :: proc "contextless" (x, y, z: f64) -> f64 { + @(require_results) + split :: proc "contextless" (b: u64) -> (sign: u32, exp: i32, mantissa: u64) { + MASK :: 0x7FF + FRAC_MASK :: 1<<52 - 1 + + sign = u32(b >> 63) + exp = i32(b>>52) & MASK + mantissa = b & FRAC_MASK + + if exp == 0 { + shift := uint(intrinsics.count_leading_zeros(mantissa) - 11) + mantissa <<= shift + exp = 1 - i32(shift) + } else { + mantissa |= 1<<52 + } + return + } + + @(require_results) + mul_u64 :: proc "contextless" (x, y: u64) -> (hi, lo: u64) { + prod_wide := u128(x) * u128(y) + hi, lo = u64(prod_wide>>64), u64(prod_wide) + return + } + + @(require_results) + add_u64 :: proc "contextless" (x, y, carry: u64) -> (sum, carry_out: u64) { + tmp_carry, tmp_carry2: bool + sum, tmp_carry = intrinsics.overflow_add(x, y) + sum, tmp_carry2 = intrinsics.overflow_add(sum, carry) + carry_out = u64(tmp_carry | tmp_carry2) + return + } + + @(require_results) + sub_u64 :: proc "contextless" (x, y, borrow: u64) -> (diff, borrow_out: u64) { + tmp_borrow, tmp_borrow2: bool + diff, tmp_borrow = intrinsics.overflow_sub(x, y) + diff, tmp_borrow2 = intrinsics.overflow_sub(diff, borrow) + borrow_out = u64(tmp_borrow | tmp_borrow2) + return + } + + @(require_results) + nonzero :: proc "contextless" (x: u64) -> u64 { + return 1 if x != 0 else 0 + } + + @(require_results) + zero :: proc "contextless" (x: u64) -> u64 { + return 1 if x == 0 else 0 + } + + @(require_results) + shl :: proc "contextless" (u1, u2: u64, n: uint) -> (r1, r2: u64) { + r1 = u1<<n | u2>>(64-n) | u2<<(n-64) + r2 = u2<<n + return + } + @(require_results) + shr :: proc "contextless" (u1, u2: u64, n: uint) -> (r1, r2: u64) { + r2 = u2>>n | u1<<(64-n) | u1>>(n-64) + r1 = u1>>n + return + } + + @(require_results) + lz :: proc "contextless" (u1, u2: u64) -> (l: i32) { + l = i32(intrinsics.count_leading_zeros(u1)) + if l == 64 { + l += i32(intrinsics.count_leading_zeros(u2)) + } + return l + } + + @(require_results) + shrcompress :: proc "contextless" (u1, u2: u64, n: uint) -> (r1, r2: u64) { + switch { + case n == 0: + return u1, u2 + case n == 64: + return 0, u1 | nonzero(u2) + case n >= 128: + return 0, nonzero(u1 | u2) + case n < 64: + r1, r2 = shr(u1, u2, n) + r2 |= nonzero(u2 & (1<<n - 1)) + case n < 128: + r1, r2 = shr(u1, u2, n) + r2 |= nonzero(u1&(1<<(n-64)-1) | u2) + } + return + } + + + + + UVINF :: 0x7ff0_0000_0000_0000 + BIAS :: 1023 + + bx, by, bz := transmute(u64)x, transmute(u64)y, transmute(u64)z + + switch { + case x == 0, y == 0, z == 0, + bx&UVINF == UVINF, by&UVINF == UVINF: + return x*y + z + } + + if bz&UVINF == UVINF { + return z + } + + xs, xe, xm := split(bx) + ys, ye, ym := split(by) + zs, ze, zm := split(bz) + + pe := xe + ye - BIAS + 1 + + pm1, pm2 := mul_u64(xm<<10, ym<<11) + zm1, zm2 := zm<<10, u64(0) + ps := xs ~ ys // product sign + + is_62_zero := uint((~pm1 >> 62) & 1) + pm1, pm2 = shl(pm1, pm2, is_62_zero) + pe -= i32(is_62_zero) + + if pe < ze || pe == ze && pm1 < zm1 { + // Swap addition operands so |p| >= |z| + ps, pe, pm1, pm2, zs, ze, zm1, zm2 = zs, ze, zm1, zm2, ps, pe, pm1, pm2 + } + + if ps != zs && pe == ze && pm1 == zm1 && pm2 == zm2 { + return 0 + } + + zm1, zm2 = shrcompress(zm1, zm2, uint(pe-ze)) + + // Compute resulting significands, normalizing if necessary. + m, c: u64 + if ps == zs { + // Adding (pm1:pm2) + (zm1:zm2) + pm2, c = add_u64(pm2, zm2, 0) + pm1, _ = add_u64(pm1, zm1, c) + pe -= i32(~pm1 >> 63) + pm1, m = shrcompress(pm1, pm2, uint(64+pm1>>63)) + } else { + // Subtracting (pm1:pm2) - (zm1:zm2) + pm2, c = sub_u64(pm2, zm2, 0) + pm1, _ = sub_u64(pm1, zm1, c) + nz := lz(pm1, pm2) + pe -= nz + m, pm2 = shl(pm1, pm2, uint(nz-1)) + m |= nonzero(pm2) + } + + // Round and break ties to even + if pe > 1022+BIAS || pe == 1022+BIAS && (m+1<<9)>>63 == 1 { + // rounded value overflows exponent range + return transmute(f64)(u64(ps)<<63 | UVINF) + } + if pe < 0 { + n := uint(-pe) + m = m>>n | nonzero(m&(1<<n-1)) + pe = 0 + } + m = ((m + 1<<9) >> 10) & ~zero((m&(1<<10-1))~1<<9) + pe &= -i32(nonzero(m)) + return transmute(f64)(u64(ps)<<63 + u64(pe)<<52 + m) +}
\ No newline at end of file |