From 42322f836c1cb6db97e9bfcc1ea62bd48b0d0470 Mon Sep 17 00:00:00 2001 From: getChan <9511chn@gmail.com> Date: Wed, 10 Apr 2024 19:43:16 +0900 Subject: [PATCH] Add coerce_types flag to parquet WriterProperties (#1938) --- parquet/src/file/properties.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs index 287e73c9906a..57cae3fb18ba 100644 --- a/parquet/src/file/properties.rs +++ b/parquet/src/file/properties.rs @@ -53,6 +53,8 @@ pub const DEFAULT_BLOOM_FILTER_FPP: f64 = 0.05; pub const DEFAULT_BLOOM_FILTER_NDV: u64 = 1_000_000_u64; /// Default values for [`WriterProperties::statistics_truncate_length`] pub const DEFAULT_STATISTICS_TRUNCATE_LENGTH: Option = None; +/// Default values for [`WriterProperties::coerce_types`] +pub const DEFAULT_COERCE_TYPES: bool = false; /// Parquet writer version. /// @@ -139,6 +141,7 @@ pub struct WriterProperties { sorting_columns: Option>, column_index_truncate_length: Option, statistics_truncate_length: Option, + coerce_types: bool, } impl Default for WriterProperties { @@ -251,6 +254,13 @@ impl WriterProperties { self.statistics_truncate_length } + /// Returns `coerce_types` boolean + /// + /// `true` if type coercion enabled. + pub fn coerce_types(&self) -> bool { + self.coerce_types + } + /// Returns encoding for a data page, when dictionary encoding is enabled. /// This is not configurable. #[inline] @@ -345,6 +355,7 @@ pub struct WriterPropertiesBuilder { sorting_columns: Option>, column_index_truncate_length: Option, statistics_truncate_length: Option, + coerce_types: bool, } impl WriterPropertiesBuilder { @@ -364,6 +375,7 @@ impl WriterPropertiesBuilder { sorting_columns: None, column_index_truncate_length: DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH, statistics_truncate_length: DEFAULT_STATISTICS_TRUNCATE_LENGTH, + coerce_types: DEFAULT_COERCE_TYPES, } } @@ -383,6 +395,7 @@ impl WriterPropertiesBuilder { sorting_columns: self.sorting_columns, column_index_truncate_length: self.column_index_truncate_length, statistics_truncate_length: self.statistics_truncate_length, + coerce_types: self.coerce_types, } } @@ -667,6 +680,13 @@ impl WriterPropertiesBuilder { self.statistics_truncate_length = max_length; self } + + /// Sets flag to enable/disable type coercion. + /// Takes precedence over globally defined settings. + pub fn set_coerce_types(mut self, coerce_types: bool) -> Self { + self.coerce_types = coerce_types; + self + } } /// Controls the level of statistics to be computed by the writer