diff --git a/crates/libm-test/src/f8_impl.rs b/crates/libm-test/src/f8_impl.rs index 56ea0b72..0683d839 100644 --- a/crates/libm-test/src/f8_impl.rs +++ b/crates/libm-test/src/f8_impl.rs @@ -78,6 +78,10 @@ impl Float for f8 { libm::generic::copysign(self, other) } + fn fma(self, _y: Self, _z: Self) -> Self { + unimplemented!() + } + fn normalize(_significand: Self::Int) -> (i32, Self::Int) { unimplemented!() } diff --git a/etc/function-definitions.json b/etc/function-definitions.json index 63d9927a..64a775ba 100644 --- a/etc/function-definitions.json +++ b/etc/function-definitions.json @@ -130,8 +130,7 @@ "copysign": { "sources": [ "src/math/copysign.rs", - "src/math/generic/copysign.rs", - "src/math/support/float_traits.rs" + "src/math/generic/copysign.rs" ], "type": "f64" }, @@ -343,22 +342,19 @@ }, "fma": { "sources": [ - "src/math/fma.rs", - "src/math/generic/fma.rs" + "src/math/fma.rs" ], "type": "f64" }, "fmaf": { "sources": [ - "src/math/fmaf.rs", - "src/math/generic/fma.rs" + "src/math/fma_wide.rs" ], "type": "f32" }, "fmaf128": { "sources": [ - "src/math/fmaf128.rs", - "src/math/generic/fma.rs" + "src/math/fma.rs" ], "type": "f128" }, diff --git a/etc/update-api-list.py b/etc/update-api-list.py index c0b6e41d..67d1b050 100755 --- a/etc/update-api-list.py +++ b/etc/update-api-list.py @@ -24,7 +24,7 @@ DIRECTORIES = [".github", "ci", "crates", "etc", "src"] # These files do not trigger a retest. -IGNORED_SOURCES = ["src/libm_helper.rs"] +IGNORED_SOURCES = ["src/libm_helper.rs", "src/math/support/float_traits.rs"] IndexTy: TypeAlias = dict[str, dict[str, Any]] """Type of the `index` item in rustdoc's JSON output""" diff --git a/src/math/cbrt.rs b/src/math/cbrt.rs index 8560d37a..9d3311cd 100644 --- a/src/math/cbrt.rs +++ b/src/math/cbrt.rs @@ -103,11 +103,11 @@ pub fn cbrt_round(x: f64, round: Round) -> FpResult { * and rr an approximation of 1/zz. We now perform another iteration of * Newton-Raphson, this time with a linear approximation only. */ y2 = y * y; - let mut y2l: f64 = fmaf64(y, y, -y2); + let mut y2l: f64 = y.fma(y, -y2); /* y2 + y2l = y^2 exactly */ let mut y3: f64 = y2 * y; - let mut y3l: f64 = fmaf64(y, y2, -y3) + y * y2l; + let mut y3l: f64 = y.fma(y2, -y3) + y * y2l; /* y3 + y3l approximates y^3 with about 106 bits of accuracy */ h = ((y3 - zz) + y3l) * rr; @@ -132,9 +132,9 @@ pub fn cbrt_round(x: f64, round: Round) -> FpResult { cold_path(); y2 = y1 * y1; - y2l = fmaf64(y1, y1, -y2); + y2l = y1.fma(y1, -y2); y3 = y2 * y1; - y3l = fmaf64(y1, y2, -y3) + y1 * y2l; + y3l = y1.fma(y2, -y3) + y1 * y2l; h = ((y3 - zz) + y3l) * rr; dy = h * (y1 * u0); y = y1 - dy; @@ -198,18 +198,6 @@ pub fn cbrt_round(x: f64, round: Round) -> FpResult { FpResult::ok(f64::from_bits(cvt3)) } -fn fmaf64(x: f64, y: f64, z: f64) -> f64 { - #[cfg(intrinsics_enabled)] - { - return unsafe { core::intrinsics::fmaf64(x, y, z) }; - } - - #[cfg(not(intrinsics_enabled))] - { - return super::fma(x, y, z); - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/src/math/fma.rs b/src/math/fma.rs index 69cc3eb6..a54984c9 100644 --- a/src/math/fma.rs +++ b/src/math/fma.rs @@ -1,14 +1,364 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */ + +use super::super::support::{DInt, FpResult, HInt, IntTy, Round, Status}; +use super::{CastFrom, CastInto, Float, Int, MinInt}; + /// Fused multiply add (f64) /// /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fma(x: f64, y: f64, z: f64) -> f64 { - return super::generic::fma(x, y, z); + fma_round(x, y, z, Round::Nearest).val +} + +/// Fused multiply add (f128) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 { + fma_round(x, y, z, Round::Nearest).val +} + +/// Fused multiply-add that works when there is not a larger float size available. Computes +/// `(x * y) + z`. +pub fn fma_round(x: F, y: F, z: F, _round: Round) -> FpResult +where + F: Float, + F: CastFrom, + F: CastFrom, + F::Int: HInt, + u32: CastInto, +{ + let one = IntTy::::ONE; + let zero = IntTy::::ZERO; + + // Normalize such that the top of the mantissa is zero and we have a guard bit. + let nx = Norm::from_float(x); + let ny = Norm::from_float(y); + let nz = Norm::from_float(z); + + if nx.is_zero_nan_inf() || ny.is_zero_nan_inf() { + // Value will overflow, defer to non-fused operations. + return FpResult::ok(x * y + z); + } + + if nz.is_zero_nan_inf() { + if nz.is_zero() { + // Empty add component means we only need to multiply. + return FpResult::ok(x * y); + } + // `z` is NaN or infinity, which sets the result. + return FpResult::ok(z); + } + + // multiply: r = x * y + let zhi: F::Int; + let zlo: F::Int; + let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi(); + + // Exponent result of multiplication + let mut e: i32 = nx.e + ny.e; + // Needed shift to align `z` to the multiplication result + let mut d: i32 = nz.e - e; + let sbits = F::BITS as i32; + + // Scale `z`. Shift `z <<= kz`, `r >>= kr`, so `kz+kr == d`, set `e = e+kr` (== ez-kz) + if d > 0 { + // The magnitude of `z` is larger than `x * y` + if d < sbits { + // Maximum shift of one `F::BITS` means shifted `z` will fit into `2 * F::BITS`. Shift + // it into `(zhi, zlo)`. No exponent adjustment necessary. + zlo = nz.m << d; + zhi = nz.m >> (sbits - d); + } else { + // Shift larger than `sbits`, `z` only needs the top half `zhi`. Place it there (acts + // as a shift by `sbits`). + zlo = zero; + zhi = nz.m; + d -= sbits; + + // `z`'s exponent is large enough that it now needs to be taken into account. + e = nz.e - sbits; + + if d == 0 { + // Exactly `sbits`, nothing to do + } else if d < sbits { + // Remaining shift fits within `sbits`. Leave `z` in place, shift `x * y` + rlo = (rhi << (sbits - d)) | (rlo >> d); + // Set the sticky bit + rlo |= IntTy::::from((rlo << (sbits - d)) != zero); + rhi = rhi >> d; + } else { + // `z`'s magnitude is enough that `x * y` is irrelevant. It was nonzero, so set + // the sticky bit. + rlo = one; + rhi = zero; + } + } + } else { + // `z`'s magnitude once shifted fits entirely within `zlo` + zhi = zero; + d = -d; + if d == 0 { + // No shift needed + zlo = nz.m; + } else if d < sbits { + // Shift s.t. `nz.m` fits into `zlo` + let sticky = IntTy::::from((nz.m << (sbits - d)) != zero); + zlo = (nz.m >> d) | sticky; + } else { + // Would be entirely shifted out, only set the sticky bit + zlo = one; + } + } + + /* addition */ + + let mut neg = nx.neg ^ ny.neg; + let samesign: bool = !neg ^ nz.neg; + let mut rhi_nonzero = true; + + if samesign { + // r += z + rlo = rlo.wrapping_add(zlo); + rhi += zhi + IntTy::::from(rlo < zlo); + } else { + // r -= z + let (res, borrow) = rlo.overflowing_sub(zlo); + rlo = res; + rhi = rhi.wrapping_sub(zhi.wrapping_add(IntTy::::from(borrow))); + if (rhi >> (F::BITS - 1)) != zero { + rlo = rlo.signed().wrapping_neg().unsigned(); + rhi = rhi.signed().wrapping_neg().unsigned() - IntTy::::from(rlo != zero); + neg = !neg; + } + rhi_nonzero = rhi != zero; + } + + /* Construct result */ + + // Shift result into `rhi`, left-aligned. Last bit is sticky + if rhi_nonzero { + // `d` > 0, need to shift both `rhi` and `rlo` into result + e += sbits; + d = rhi.leading_zeros() as i32 - 1; + rhi = (rhi << d) | (rlo >> (sbits - d)); + // Update sticky + rhi |= IntTy::::from((rlo << d) != zero); + } else if rlo != zero { + // `rhi` is zero, `rlo` is the entire result and needs to be shifted + d = rlo.leading_zeros() as i32 - 1; + if d < 0 { + // Shift and set sticky + rhi = (rlo >> 1) | (rlo & one); + } else { + rhi = rlo << d; + } + } else { + // exact +/- 0.0 + return FpResult::ok(x * y + z); + } + + e -= d; + + // Use int->float conversion to populate the significand. + // i is in [1 << (BITS - 2), (1 << (BITS - 1)) - 1] + let mut i: F::SignedInt = rhi.signed(); + + if neg { + i = -i; + } + + // `|r|` is in `[0x1p62,0x1p63]` for `f64` + let mut r: F = F::cast_from_lossy(i); + + /* Account for subnormal and rounding */ + + // Unbiased exponent for the maximum value of `r` + let max_pow = F::BITS - 1 + F::EXP_BIAS; + + let mut status = Status::OK; + + if e < -(max_pow as i32 - 2) { + // Result is subnormal before rounding + if e == -(max_pow as i32 - 1) { + let mut c = F::from_parts(false, max_pow, zero); + if neg { + c = -c; + } + + if r == c { + // Min normal after rounding, + status.set_underflow(true); + r = F::MIN_POSITIVE_NORMAL.copysign(r); + return FpResult::new(r, status); + } + + if (rhi << (F::SIG_BITS + 1)) != zero { + // Account for truncated bits. One bit will be lost in the `scalbn` call, add + // another top bit to avoid double rounding if inexact. + let iu: F::Int = (rhi >> 1) | (rhi & one) | (one << (F::BITS - 2)); + i = iu.signed(); + + if neg { + i = -i; + } + + r = F::cast_from_lossy(i); + + // Remove the top bit + r = F::cast_from(2i8) * r - c; + status.set_underflow(true); + } + } else { + // Only round once when scaled + d = F::EXP_BITS as i32 - 1; + let sticky = IntTy::::from(rhi << (F::BITS as i32 - d) != zero); + i = (((rhi >> d) | sticky) << d).signed(); + + if neg { + i = -i; + } + + r = F::cast_from_lossy(i); + } + } + + // Use our exponent to scale the final value. + FpResult::new(super::generic::scalbn(r, e), status) +} + +/// Representation of `F` that has handled subnormals. +#[derive(Clone, Copy, Debug)] +struct Norm { + /// Normalized significand with one guard bit, unsigned. + m: F::Int, + /// Exponent of the mantissa such that `m * 2^e = x`. Accounts for the shift in the mantissa + /// and the guard bit; that is, 1.0 will normalize as `m = 1 << 53` and `e = -53`. + e: i32, + neg: bool, +} + +impl Norm { + /// Unbias the exponent and account for the mantissa's precision, including the guard bit. + const EXP_UNBIAS: u32 = F::EXP_BIAS + F::SIG_BITS + 1; + + /// Values greater than this had a saturated exponent (infinity or NaN), OR were zero and we + /// adjusted the exponent such that it exceeds this threashold. + const ZERO_INF_NAN: u32 = F::EXP_SAT - Self::EXP_UNBIAS; + + fn from_float(x: F) -> Self { + let mut ix = x.to_bits(); + let mut e = x.ex() as i32; + let neg = x.is_sign_negative(); + if e == 0 { + // Normalize subnormals by multiplication + let scale_i = F::BITS - 1; + let scale_f = F::from_parts(false, scale_i + F::EXP_BIAS, F::Int::ZERO); + let scaled = x * scale_f; + ix = scaled.to_bits(); + e = scaled.ex() as i32; + e = if e == 0 { + // If the exponent is still zero, the input was zero. Artifically set this value + // such that the final `e` will exceed `ZERO_INF_NAN`. + 1 << F::EXP_BITS + } else { + // Otherwise, account for the scaling we just did. + e - scale_i as i32 + }; + } + + e -= Self::EXP_UNBIAS as i32; + + // Absolute value, set the implicit bit, and shift to create a guard bit + ix &= F::SIG_MASK; + ix |= F::IMPLICIT_BIT; + ix <<= 1; + + Self { m: ix, e, neg } + } + + /// True if the value was zero, infinity, or NaN. + fn is_zero_nan_inf(self) -> bool { + self.e >= Self::ZERO_INF_NAN as i32 + } + + /// The only value we have + fn is_zero(self) -> bool { + // The only exponent that strictly exceeds this value is our sentinel value for zero. + self.e > Self::ZERO_INF_NAN as i32 + } } #[cfg(test)] mod tests { use super::*; + + /// Test the generic `fma_round` algorithm for a given float. + fn spec_test() + where + F: Float, + F: CastFrom, + F: CastFrom, + F::Int: HInt, + u32: CastInto, + { + let x = F::from_bits(F::Int::ONE); + let y = F::from_bits(F::Int::ONE); + let z = F::ZERO; + + let fma = |x, y, z| fma_round(x, y, z, Round::Nearest).val; + + // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of + // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the + // exact result" + assert_biteq!(fma(x, y, z), F::ZERO); + assert_biteq!(fma(x, -y, z), F::NEG_ZERO); + assert_biteq!(fma(-x, y, z), F::NEG_ZERO); + assert_biteq!(fma(-x, -y, z), F::ZERO); + } + + #[test] + fn spec_test_f32() { + spec_test::(); + } + + #[test] + fn spec_test_f64() { + spec_test::(); + + let expect_underflow = [ + ( + hf64!("0x1.0p-1070"), + hf64!("0x1.0p-1070"), + hf64!("0x1.ffffffffffffp-1023"), + hf64!("0x0.ffffffffffff8p-1022"), + ), + ( + // FIXME: we raise underflow but this should only be inexact (based on C and + // `rustc_apfloat`). + hf64!("0x1.0p-1070"), + hf64!("0x1.0p-1070"), + hf64!("-0x1.0p-1022"), + hf64!("-0x1.0p-1022"), + ), + ]; + + for (x, y, z, res) in expect_underflow { + let FpResult { val, status } = fma_round(x, y, z, Round::Nearest); + assert_biteq!(val, res); + assert_eq!(status, Status::UNDERFLOW); + } + } + + #[test] + #[cfg(f128_enabled)] + fn spec_test_f128() { + spec_test::(); + } + #[test] fn fma_segfault() { // These two inputs cause fma to segfault on release due to overflow: diff --git a/src/math/fma_wide.rs b/src/math/fma_wide.rs new file mode 100644 index 00000000..a8c1a548 --- /dev/null +++ b/src/math/fma_wide.rs @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/fmaf.c. Ported to generic Rust algorithm in 2025, TG. */ + +use super::super::support::{FpResult, IntTy, Round, Status}; +use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt}; + +// Placeholder so we can have `fmaf16` in the `Float` trait. +#[allow(unused)] +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { + unimplemented!() +} + +/// Floating multiply add (f32) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { + fma_wide_round(x, y, z, Round::Nearest).val +} + +/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`, +/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding. +pub fn fma_wide_round(x: F, y: F, z: F, round: Round) -> FpResult +where + F: Float + HFloat, + B: Float + DFloat, + B::Int: CastInto, + i32: CastFrom, +{ + let one = IntTy::::ONE; + + let xy: B = x.widen() * y.widen(); + let mut result: B = xy + z.widen(); + let mut ui: B::Int = result.to_bits(); + let re = result.ex(); + let zb: B = z.widen(); + + let prec_diff = B::SIG_BITS - F::SIG_BITS; + let excess_prec = ui & ((one << prec_diff) - one); + let halfway = one << (prec_diff - 1); + + // Common case: the larger precision is fine if... + // This is not a halfway case + if excess_prec != halfway + // Or the result is NaN + || re == B::EXP_SAT + // Or the result is exact + || (result - xy == zb && result - zb == xy) + // Or the mode is something other than round to nearest + || round != Round::Nearest + { + let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32; + let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32; + + let mut status = Status::OK; + + if (min_inexact_exp..max_inexact_exp).contains(&re) && status.inexact() { + // This branch is never hit; requires previous operations to set a status + status.set_inexact(false); + + result = xy + z.widen(); + if status.inexact() { + status.set_underflow(true); + } else { + status.set_inexact(true); + } + } + + return FpResult { val: result.narrow(), status }; + } + + let neg = ui >> (B::BITS - 1) != IntTy::::ZERO; + let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy }; + if neg == (err < B::ZERO) { + ui += one; + } else { + ui -= one; + } + + FpResult::ok(B::from_bits(ui).narrow()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn issue_263() { + let a = f32::from_bits(1266679807); + let b = f32::from_bits(1300234242); + let c = f32::from_bits(1115553792); + let expected = f32::from_bits(1501560833); + assert_eq!(fmaf(a, b, c), expected); + } +} diff --git a/src/math/fmaf.rs b/src/math/fmaf.rs deleted file mode 100644 index 40d7f40d..00000000 --- a/src/math/fmaf.rs +++ /dev/null @@ -1,21 +0,0 @@ -/// Floating multiply add (f32) -/// -/// Computes `(x*y)+z`, rounded as one ternary operation: -/// Computes the value (as if) to infinite precision and rounds once to the result format, -/// according to the rounding mode characterized by the value of FLT_ROUNDS. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { - super::generic::fma_wide(x, y, z) -} - -#[cfg(test)] -mod tests { - #[test] - fn issue_263() { - let a = f32::from_bits(1266679807); - let b = f32::from_bits(1300234242); - let c = f32::from_bits(1115553792); - let expected = f32::from_bits(1501560833); - assert_eq!(super::fmaf(a, b, c), expected); - } -} diff --git a/src/math/fmaf128.rs b/src/math/fmaf128.rs deleted file mode 100644 index 50f7360d..00000000 --- a/src/math/fmaf128.rs +++ /dev/null @@ -1,7 +0,0 @@ -/// Fused multiply add (f128) -/// -/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 { - return super::generic::fma(x, y, z); -} diff --git a/src/math/generic/fma.rs b/src/math/generic/fma.rs deleted file mode 100644 index cb1061cc..00000000 --- a/src/math/generic/fma.rs +++ /dev/null @@ -1,420 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* origin: musl src/math/{fma,fmaf}.c. Ported to generic Rust algorithm in 2025, TG. */ - -use super::super::support::{DInt, FpResult, HInt, IntTy, Round, Status}; -use super::super::{CastFrom, CastInto, DFloat, Float, HFloat, Int, MinInt}; - -/// Fused multiply-add that works when there is not a larger float size available. Currently this -/// is still specialized only for `f64`. Computes `(x * y) + z`. -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fma(x: F, y: F, z: F) -> F -where - F: Float, - F: CastFrom, - F: CastFrom, - F::Int: HInt, - u32: CastInto, -{ - fma_round(x, y, z, Round::Nearest).val -} - -pub fn fma_round(x: F, y: F, z: F, _round: Round) -> FpResult -where - F: Float, - F: CastFrom, - F: CastFrom, - F::Int: HInt, - u32: CastInto, -{ - let one = IntTy::::ONE; - let zero = IntTy::::ZERO; - - // Normalize such that the top of the mantissa is zero and we have a guard bit. - let nx = Norm::from_float(x); - let ny = Norm::from_float(y); - let nz = Norm::from_float(z); - - if nx.is_zero_nan_inf() || ny.is_zero_nan_inf() { - // Value will overflow, defer to non-fused operations. - return FpResult::ok(x * y + z); - } - - if nz.is_zero_nan_inf() { - if nz.is_zero() { - // Empty add component means we only need to multiply. - return FpResult::ok(x * y); - } - // `z` is NaN or infinity, which sets the result. - return FpResult::ok(z); - } - - // multiply: r = x * y - let zhi: F::Int; - let zlo: F::Int; - let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi(); - - // Exponent result of multiplication - let mut e: i32 = nx.e + ny.e; - // Needed shift to align `z` to the multiplication result - let mut d: i32 = nz.e - e; - let sbits = F::BITS as i32; - - // Scale `z`. Shift `z <<= kz`, `r >>= kr`, so `kz+kr == d`, set `e = e+kr` (== ez-kz) - if d > 0 { - // The magnitude of `z` is larger than `x * y` - if d < sbits { - // Maximum shift of one `F::BITS` means shifted `z` will fit into `2 * F::BITS`. Shift - // it into `(zhi, zlo)`. No exponent adjustment necessary. - zlo = nz.m << d; - zhi = nz.m >> (sbits - d); - } else { - // Shift larger than `sbits`, `z` only needs the top half `zhi`. Place it there (acts - // as a shift by `sbits`). - zlo = zero; - zhi = nz.m; - d -= sbits; - - // `z`'s exponent is large enough that it now needs to be taken into account. - e = nz.e - sbits; - - if d == 0 { - // Exactly `sbits`, nothing to do - } else if d < sbits { - // Remaining shift fits within `sbits`. Leave `z` in place, shift `x * y` - rlo = (rhi << (sbits - d)) | (rlo >> d); - // Set the sticky bit - rlo |= IntTy::::from((rlo << (sbits - d)) != zero); - rhi = rhi >> d; - } else { - // `z`'s magnitude is enough that `x * y` is irrelevant. It was nonzero, so set - // the sticky bit. - rlo = one; - rhi = zero; - } - } - } else { - // `z`'s magnitude once shifted fits entirely within `zlo` - zhi = zero; - d = -d; - if d == 0 { - // No shift needed - zlo = nz.m; - } else if d < sbits { - // Shift s.t. `nz.m` fits into `zlo` - let sticky = IntTy::::from((nz.m << (sbits - d)) != zero); - zlo = (nz.m >> d) | sticky; - } else { - // Would be entirely shifted out, only set the sticky bit - zlo = one; - } - } - - /* addition */ - - let mut neg = nx.neg ^ ny.neg; - let samesign: bool = !neg ^ nz.neg; - let mut rhi_nonzero = true; - - if samesign { - // r += z - rlo = rlo.wrapping_add(zlo); - rhi += zhi + IntTy::::from(rlo < zlo); - } else { - // r -= z - let (res, borrow) = rlo.overflowing_sub(zlo); - rlo = res; - rhi = rhi.wrapping_sub(zhi.wrapping_add(IntTy::::from(borrow))); - if (rhi >> (F::BITS - 1)) != zero { - rlo = rlo.signed().wrapping_neg().unsigned(); - rhi = rhi.signed().wrapping_neg().unsigned() - IntTy::::from(rlo != zero); - neg = !neg; - } - rhi_nonzero = rhi != zero; - } - - /* Construct result */ - - // Shift result into `rhi`, left-aligned. Last bit is sticky - if rhi_nonzero { - // `d` > 0, need to shift both `rhi` and `rlo` into result - e += sbits; - d = rhi.leading_zeros() as i32 - 1; - rhi = (rhi << d) | (rlo >> (sbits - d)); - // Update sticky - rhi |= IntTy::::from((rlo << d) != zero); - } else if rlo != zero { - // `rhi` is zero, `rlo` is the entire result and needs to be shifted - d = rlo.leading_zeros() as i32 - 1; - if d < 0 { - // Shift and set sticky - rhi = (rlo >> 1) | (rlo & one); - } else { - rhi = rlo << d; - } - } else { - // exact +/- 0.0 - return FpResult::ok(x * y + z); - } - - e -= d; - - // Use int->float conversion to populate the significand. - // i is in [1 << (BITS - 2), (1 << (BITS - 1)) - 1] - let mut i: F::SignedInt = rhi.signed(); - - if neg { - i = -i; - } - - // `|r|` is in `[0x1p62,0x1p63]` for `f64` - let mut r: F = F::cast_from_lossy(i); - - /* Account for subnormal and rounding */ - - // Unbiased exponent for the maximum value of `r` - let max_pow = F::BITS - 1 + F::EXP_BIAS; - - let mut status = Status::OK; - - if e < -(max_pow as i32 - 2) { - // Result is subnormal before rounding - if e == -(max_pow as i32 - 1) { - let mut c = F::from_parts(false, max_pow, zero); - if neg { - c = -c; - } - - if r == c { - // Min normal after rounding, - status.set_underflow(true); - r = F::MIN_POSITIVE_NORMAL.copysign(r); - return FpResult::new(r, status); - } - - if (rhi << (F::SIG_BITS + 1)) != zero { - // Account for truncated bits. One bit will be lost in the `scalbn` call, add - // another top bit to avoid double rounding if inexact. - let iu: F::Int = (rhi >> 1) | (rhi & one) | (one << (F::BITS - 2)); - i = iu.signed(); - - if neg { - i = -i; - } - - r = F::cast_from_lossy(i); - - // Remove the top bit - r = F::cast_from(2i8) * r - c; - status.set_underflow(true); - } - } else { - // Only round once when scaled - d = F::EXP_BITS as i32 - 1; - let sticky = IntTy::::from(rhi << (F::BITS as i32 - d) != zero); - i = (((rhi >> d) | sticky) << d).signed(); - - if neg { - i = -i; - } - - r = F::cast_from_lossy(i); - } - } - - // Use our exponent to scale the final value. - FpResult::new(super::scalbn(r, e), status) -} - -/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`, -/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding. -pub fn fma_wide(x: F, y: F, z: F) -> F -where - F: Float + HFloat, - B: Float + DFloat, - B::Int: CastInto, - i32: CastFrom, -{ - fma_wide_round(x, y, z, Round::Nearest).val -} - -pub fn fma_wide_round(x: F, y: F, z: F, round: Round) -> FpResult -where - F: Float + HFloat, - B: Float + DFloat, - B::Int: CastInto, - i32: CastFrom, -{ - let one = IntTy::::ONE; - - let xy: B = x.widen() * y.widen(); - let mut result: B = xy + z.widen(); - let mut ui: B::Int = result.to_bits(); - let re = result.ex(); - let zb: B = z.widen(); - - let prec_diff = B::SIG_BITS - F::SIG_BITS; - let excess_prec = ui & ((one << prec_diff) - one); - let halfway = one << (prec_diff - 1); - - // Common case: the larger precision is fine if... - // This is not a halfway case - if excess_prec != halfway - // Or the result is NaN - || re == B::EXP_SAT - // Or the result is exact - || (result - xy == zb && result - zb == xy) - // Or the mode is something other than round to nearest - || round != Round::Nearest - { - let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32; - let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32; - - let mut status = Status::OK; - - if (min_inexact_exp..max_inexact_exp).contains(&re) && status.inexact() { - // This branch is never hit; requires previous operations to set a status - status.set_inexact(false); - - result = xy + z.widen(); - if status.inexact() { - status.set_underflow(true); - } else { - status.set_inexact(true); - } - } - - return FpResult { val: result.narrow(), status }; - } - - let neg = ui >> (B::BITS - 1) != IntTy::::ZERO; - let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy }; - if neg == (err < B::ZERO) { - ui += one; - } else { - ui -= one; - } - - FpResult::ok(B::from_bits(ui).narrow()) -} - -/// Representation of `F` that has handled subnormals. -#[derive(Clone, Copy, Debug)] -struct Norm { - /// Normalized significand with one guard bit, unsigned. - m: F::Int, - /// Exponent of the mantissa such that `m * 2^e = x`. Accounts for the shift in the mantissa - /// and the guard bit; that is, 1.0 will normalize as `m = 1 << 53` and `e = -53`. - e: i32, - neg: bool, -} - -impl Norm { - /// Unbias the exponent and account for the mantissa's precision, including the guard bit. - const EXP_UNBIAS: u32 = F::EXP_BIAS + F::SIG_BITS + 1; - - /// Values greater than this had a saturated exponent (infinity or NaN), OR were zero and we - /// adjusted the exponent such that it exceeds this threashold. - const ZERO_INF_NAN: u32 = F::EXP_SAT - Self::EXP_UNBIAS; - - fn from_float(x: F) -> Self { - let mut ix = x.to_bits(); - let mut e = x.ex() as i32; - let neg = x.is_sign_negative(); - if e == 0 { - // Normalize subnormals by multiplication - let scale_i = F::BITS - 1; - let scale_f = F::from_parts(false, scale_i + F::EXP_BIAS, F::Int::ZERO); - let scaled = x * scale_f; - ix = scaled.to_bits(); - e = scaled.ex() as i32; - e = if e == 0 { - // If the exponent is still zero, the input was zero. Artifically set this value - // such that the final `e` will exceed `ZERO_INF_NAN`. - 1 << F::EXP_BITS - } else { - // Otherwise, account for the scaling we just did. - e - scale_i as i32 - }; - } - - e -= Self::EXP_UNBIAS as i32; - - // Absolute value, set the implicit bit, and shift to create a guard bit - ix &= F::SIG_MASK; - ix |= F::IMPLICIT_BIT; - ix <<= 1; - - Self { m: ix, e, neg } - } - - /// True if the value was zero, infinity, or NaN. - fn is_zero_nan_inf(self) -> bool { - self.e >= Self::ZERO_INF_NAN as i32 - } - - /// The only value we have - fn is_zero(self) -> bool { - // The only exponent that strictly exceeds this value is our sentinel value for zero. - self.e > Self::ZERO_INF_NAN as i32 - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn spec_test() - where - F: Float, - F: CastFrom, - F: CastFrom, - F::Int: HInt, - u32: CastInto, - { - let x = F::from_bits(F::Int::ONE); - let y = F::from_bits(F::Int::ONE); - let z = F::ZERO; - - // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of - // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the - // exact result" - assert_biteq!(fma(x, y, z), F::ZERO); - assert_biteq!(fma(x, -y, z), F::NEG_ZERO); - assert_biteq!(fma(-x, y, z), F::NEG_ZERO); - assert_biteq!(fma(-x, -y, z), F::ZERO); - } - - #[test] - fn spec_test_f64() { - spec_test::(); - - let expect_underflow = [ - ( - hf64!("0x1.0p-1070"), - hf64!("0x1.0p-1070"), - hf64!("0x1.ffffffffffffp-1023"), - hf64!("0x0.ffffffffffff8p-1022"), - ), - ( - // FIXME: we raise underflow but this should only be inexact (based on C and - // `rustc_apfloat`). - hf64!("0x1.0p-1070"), - hf64!("0x1.0p-1070"), - hf64!("-0x1.0p-1022"), - hf64!("-0x1.0p-1022"), - ), - ]; - - for (x, y, z, res) in expect_underflow { - let FpResult { val, status } = fma_round(x, y, z, Round::Nearest); - assert_biteq!(val, res); - assert_eq!(status, Status::UNDERFLOW); - } - } - - #[test] - #[cfg(f128_enabled)] - fn spec_test_f128() { - spec_test::(); - } -} diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs index f224eba7..9be185f8 100644 --- a/src/math/generic/mod.rs +++ b/src/math/generic/mod.rs @@ -3,7 +3,6 @@ mod copysign; mod fabs; mod fdim; mod floor; -mod fma; mod fmax; mod fmaximum; mod fmaximum_num; @@ -22,7 +21,6 @@ pub use copysign::copysign; pub use fabs::fabs; pub use fdim::fdim; pub use floor::floor; -pub use fma::{fma, fma_wide}; pub use fmax::fmax; pub use fmaximum::fmaximum; pub use fmaximum_num::fmaximum_num; diff --git a/src/math/mod.rs b/src/math/mod.rs index e58d79ad..5fc8fa0b 100644 --- a/src/math/mod.rs +++ b/src/math/mod.rs @@ -164,7 +164,7 @@ mod fdimf; mod floor; mod floorf; mod fma; -mod fmaf; +mod fma_wide; mod fmin_fmax; mod fminimum_fmaximum; mod fminimum_fmaximum_num; @@ -271,7 +271,7 @@ pub use self::fdimf::fdimf; pub use self::floor::floor; pub use self::floorf::floorf; pub use self::fma::fma; -pub use self::fmaf::fmaf; +pub use self::fma_wide::fmaf; pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf}; pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf}; pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf}; @@ -370,6 +370,9 @@ cfg_if! { pub use self::sqrtf16::sqrtf16; pub use self::truncf16::truncf16; // verify-sorted-end + + #[allow(unused_imports)] + pub(crate) use self::fma_wide::fmaf16; } } @@ -381,7 +384,6 @@ cfg_if! { mod fabsf128; mod fdimf128; mod floorf128; - mod fmaf128; mod fmodf128; mod ldexpf128; mod roundf128; @@ -396,7 +398,7 @@ cfg_if! { pub use self::fabsf128::fabsf128; pub use self::fdimf128::fdimf128; pub use self::floorf128::floorf128; - pub use self::fmaf128::fmaf128; + pub use self::fma::fmaf128; pub use self::fmin_fmax::{fmaxf128, fminf128}; pub use self::fminimum_fmaximum::{fmaximumf128, fminimumf128}; pub use self::fminimum_fmaximum_num::{fmaximum_numf128, fminimum_numf128}; diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs index 534ca9a0..96c209c8 100644 --- a/src/math/support/float_traits.rs +++ b/src/math/support/float_traits.rs @@ -160,9 +160,11 @@ pub trait Float: fn abs(self) -> Self; /// Returns a number composed of the magnitude of self and the sign of sign. - #[allow(dead_code)] fn copysign(self, other: Self) -> Self; + /// Fused multiply add, rounding once. + fn fma(self, y: Self, z: Self) -> Self; + /// Returns (normalized exponent, normalized significand) #[allow(dead_code)] fn normalize(significand: Self::Int) -> (i32, Self::Int); @@ -184,7 +186,9 @@ macro_rules! float_impl { $sity:ident, $bits:expr, $significand_bits:expr, - $from_bits:path + $from_bits:path, + $fma_fn:ident, + $fma_intrinsic:ident ) => { impl Float for $ty { type Int = $ity; @@ -252,6 +256,16 @@ macro_rules! float_impl { } } } + fn fma(self, y: Self, z: Self) -> Self { + cfg_if! { + // fma is not yet available in `core` + if #[cfg(intrinsics_enabled)] { + unsafe{ core::intrinsics::$fma_intrinsic(self, y, z) } + } else { + super::super::$fma_fn(self, y, z) + } + } + } fn normalize(significand: Self::Int) -> (i32, Self::Int) { let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS); (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int) @@ -261,11 +275,11 @@ macro_rules! float_impl { } #[cfg(f16_enabled)] -float_impl!(f16, u16, i16, 16, 10, f16::from_bits); -float_impl!(f32, u32, i32, 32, 23, f32_from_bits); -float_impl!(f64, u64, i64, 64, 52, f64_from_bits); +float_impl!(f16, u16, i16, 16, 10, f16::from_bits, fmaf16, fmaf16); +float_impl!(f32, u32, i32, 32, 23, f32_from_bits, fmaf, fmaf32); +float_impl!(f64, u64, i64, 64, 52, f64_from_bits, fma, fmaf64); #[cfg(f128_enabled)] -float_impl!(f128, u128, i128, 128, 112, f128::from_bits); +float_impl!(f128, u128, i128, 128, 112, f128::from_bits, fmaf128, fmaf128); /* FIXME(msrv): vendor some things that are not const stable at our MSRV */