diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs index ca76e14cb8ab..279ad434a817 100644 --- a/datafusion/common/src/stats.rs +++ b/datafusion/common/src/stats.rs @@ -17,11 +17,187 @@ //! This module provides data structures to represent statistics -use std::fmt::Display; - +use crate::ScalarValue; use arrow::datatypes::DataType; -use crate::ScalarValue; +use std::fmt::{self, Debug, Display}; + +/// Represents a value with a degree of certainty. `Precision` is used to +/// propagate information the precision of statistical values. +#[derive(Clone, PartialEq, Eq, Default)] +pub enum Precision { + /// The exact value is known + Exact(T), + /// The value is not known exactly, but is likely close to this value + Inexact(T), + /// Nothing is known about the value + #[default] + Absent, +} + +impl Precision { + /// If we have some value (exact or inexact), it returns that value. + /// Otherwise, it returns `None`. + pub fn get_value(&self) -> Option<&T> { + match self { + Precision::Exact(value) | Precision::Inexact(value) => Some(value), + Precision::Absent => None, + } + } + + /// Transform the value in this [`Precision`] object, if one exists, using + /// the given function. Preserves the exactness state. + pub fn map(self, f: F) -> Precision + where + F: Fn(T) -> T, + { + match self { + Precision::Exact(val) => Precision::Exact(f(val)), + Precision::Inexact(val) => Precision::Inexact(f(val)), + _ => self, + } + } + + /// Returns `Some(true)` if we have an exact value, `Some(false)` if we + /// have an inexact value, and `None` if there is no value. + pub fn is_exact(&self) -> Option { + match self { + Precision::Exact(_) => Some(true), + Precision::Inexact(_) => Some(false), + _ => None, + } + } + + /// Returns the maximum of two (possibly inexact) values, conservatively + /// propagating exactness information. If one of the input values is + /// [`Precision::Absent`], the result is `Absent` too. + pub fn max(&self, other: &Precision) -> Precision { + match (self, other) { + (Precision::Exact(a), Precision::Exact(b)) => { + Precision::Exact(if a >= b { a.clone() } else { b.clone() }) + } + (Precision::Inexact(a), Precision::Exact(b)) + | (Precision::Exact(a), Precision::Inexact(b)) + | (Precision::Inexact(a), Precision::Inexact(b)) => { + Precision::Inexact(if a >= b { a.clone() } else { b.clone() }) + } + (_, _) => Precision::Absent, + } + } + + /// Returns the minimum of two (possibly inexact) values, conservatively + /// propagating exactness information. If one of the input values is + /// [`Precision::Absent`], the result is `Absent` too. + pub fn min(&self, other: &Precision) -> Precision { + match (self, other) { + (Precision::Exact(a), Precision::Exact(b)) => { + Precision::Exact(if a >= b { b.clone() } else { a.clone() }) + } + (Precision::Inexact(a), Precision::Exact(b)) + | (Precision::Exact(a), Precision::Inexact(b)) + | (Precision::Inexact(a), Precision::Inexact(b)) => { + Precision::Inexact(if a >= b { b.clone() } else { a.clone() }) + } + (_, _) => Precision::Absent, + } + } + + /// Demotes the precision state from exact to inexact (if present). + pub fn to_inexact(self) -> Self { + match self { + Precision::Exact(value) => Precision::Inexact(value), + _ => self, + } + } +} + +impl Precision { + /// Calculates the sum of two (possibly inexact) [`usize`] values, + /// conservatively propagating exactness information. If one of the input + /// values is [`Precision::Absent`], the result is `Absent` too. + pub fn add(&self, other: &Precision) -> Precision { + match (self, other) { + (Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a + b), + (Precision::Inexact(a), Precision::Exact(b)) + | (Precision::Exact(a), Precision::Inexact(b)) + | (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a + b), + (_, _) => Precision::Absent, + } + } + + /// Calculates the difference of two (possibly inexact) [`usize`] values, + /// conservatively propagating exactness information. If one of the input + /// values is [`Precision::Absent`], the result is `Absent` too. + pub fn sub(&self, other: &Precision) -> Precision { + match (self, other) { + (Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a - b), + (Precision::Inexact(a), Precision::Exact(b)) + | (Precision::Exact(a), Precision::Inexact(b)) + | (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a - b), + (_, _) => Precision::Absent, + } + } + + /// Calculates the multiplication of two (possibly inexact) [`usize`] values, + /// conservatively propagating exactness information. If one of the input + /// values is [`Precision::Absent`], the result is `Absent` too. + pub fn multiply(&self, other: &Precision) -> Precision { + match (self, other) { + (Precision::Exact(a), Precision::Exact(b)) => Precision::Exact(a * b), + (Precision::Inexact(a), Precision::Exact(b)) + | (Precision::Exact(a), Precision::Inexact(b)) + | (Precision::Inexact(a), Precision::Inexact(b)) => Precision::Inexact(a * b), + (_, _) => Precision::Absent, + } + } +} + +impl Precision { + /// Calculates the sum of two (possibly inexact) [`ScalarValue`] values, + /// conservatively propagating exactness information. If one of the input + /// values is [`Precision::Absent`], the result is `Absent` too. + pub fn add(&self, other: &Precision) -> Precision { + match (self, other) { + (Precision::Exact(a), Precision::Exact(b)) => { + if let Ok(result) = a.add(b) { + Precision::Exact(result) + } else { + Precision::Absent + } + } + (Precision::Inexact(a), Precision::Exact(b)) + | (Precision::Exact(a), Precision::Inexact(b)) + | (Precision::Inexact(a), Precision::Inexact(b)) => { + if let Ok(result) = a.add(b) { + Precision::Inexact(result) + } else { + Precision::Absent + } + } + (_, _) => Precision::Absent, + } + } +} + +impl Debug for Precision { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Precision::Exact(inner) => write!(f, "Exact({:?})", inner), + Precision::Inexact(inner) => write!(f, "Inexact({:?})", inner), + Precision::Absent => write!(f, "Absent"), + } + } +} + +impl Display for Precision { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Precision::Exact(inner) => write!(f, "Exact({:?})", inner), + Precision::Inexact(inner) => write!(f, "Inexact({:?})", inner), + Precision::Absent => write!(f, "Absent"), + } + } +} /// Statistics for a relation /// Fields are optional and can be inexact because the sources @@ -94,3 +270,118 @@ impl ColumnStatistics { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_value() { + let exact_precision = Precision::Exact(42); + let inexact_precision = Precision::Inexact(23); + let absent_precision = Precision::::Absent; + + assert_eq!(*exact_precision.get_value().unwrap(), 42); + assert_eq!(*inexact_precision.get_value().unwrap(), 23); + assert_eq!(absent_precision.get_value(), None); + } + + #[test] + fn test_map() { + let exact_precision = Precision::Exact(42); + let inexact_precision = Precision::Inexact(23); + let absent_precision = Precision::Absent; + + let squared = |x| x * x; + + assert_eq!(exact_precision.map(squared), Precision::Exact(1764)); + assert_eq!(inexact_precision.map(squared), Precision::Inexact(529)); + assert_eq!(absent_precision.map(squared), Precision::Absent); + } + + #[test] + fn test_is_exact() { + let exact_precision = Precision::Exact(42); + let inexact_precision = Precision::Inexact(23); + let absent_precision = Precision::::Absent; + + assert_eq!(exact_precision.is_exact(), Some(true)); + assert_eq!(inexact_precision.is_exact(), Some(false)); + assert_eq!(absent_precision.is_exact(), None); + } + + #[test] + fn test_max() { + let precision1 = Precision::Exact(42); + let precision2 = Precision::Inexact(23); + let precision3 = Precision::Exact(30); + let absent_precision = Precision::Absent; + + assert_eq!(precision1.max(&precision2), Precision::Inexact(42)); + assert_eq!(precision1.max(&precision3), Precision::Exact(42)); + assert_eq!(precision2.max(&precision3), Precision::Inexact(30)); + assert_eq!(precision1.max(&absent_precision), Precision::Absent); + } + + #[test] + fn test_min() { + let precision1 = Precision::Exact(42); + let precision2 = Precision::Inexact(23); + let precision3 = Precision::Exact(30); + let absent_precision = Precision::Absent; + + assert_eq!(precision1.min(&precision2), Precision::Inexact(23)); + assert_eq!(precision1.min(&precision3), Precision::Exact(30)); + assert_eq!(precision2.min(&precision3), Precision::Inexact(23)); + assert_eq!(precision1.min(&absent_precision), Precision::Absent); + } + + #[test] + fn test_to_inexact() { + let exact_precision = Precision::Exact(42); + let inexact_precision = Precision::Inexact(42); + let absent_precision = Precision::::Absent; + + assert_eq!(exact_precision.clone().to_inexact(), inexact_precision); + assert_eq!(inexact_precision.clone().to_inexact(), inexact_precision); + assert_eq!(absent_precision.clone().to_inexact(), absent_precision); + } + + #[test] + fn test_add() { + let precision1 = Precision::Exact(42); + let precision2 = Precision::Inexact(23); + let precision3 = Precision::Exact(30); + let absent_precision = Precision::Absent; + + assert_eq!(precision1.add(&precision2), Precision::Inexact(65)); + assert_eq!(precision1.add(&precision3), Precision::Exact(72)); + assert_eq!(precision2.add(&precision3), Precision::Inexact(53)); + assert_eq!(precision1.add(&absent_precision), Precision::Absent); + } + + #[test] + fn test_sub() { + let precision1 = Precision::Exact(42); + let precision2 = Precision::Inexact(23); + let precision3 = Precision::Exact(30); + let absent_precision = Precision::Absent; + + assert_eq!(precision1.sub(&precision2), Precision::Inexact(19)); + assert_eq!(precision1.sub(&precision3), Precision::Exact(12)); + assert_eq!(precision1.sub(&absent_precision), Precision::Absent); + } + + #[test] + fn test_multiply() { + let precision1 = Precision::Exact(6); + let precision2 = Precision::Inexact(3); + let precision3 = Precision::Exact(5); + let absent_precision = Precision::Absent; + + assert_eq!(precision1.multiply(&precision2), Precision::Inexact(18)); + assert_eq!(precision1.multiply(&precision3), Precision::Exact(30)); + assert_eq!(precision2.multiply(&precision3), Precision::Inexact(15)); + assert_eq!(precision1.multiply(&absent_precision), Precision::Absent); + } +}