-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Encapsulates logic from TypeSignatureClass, rename methods #4
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,8 @@ use std::num::NonZeroUsize; | |
|
||
use crate::type_coercion::aggregates::NUMERICS; | ||
use arrow::datatypes::{DataType, IntervalUnit, TimeUnit}; | ||
use datafusion_common::types::{LogicalTypeRef, NativeType}; | ||
use datafusion_common::internal_err; | ||
use datafusion_common::types::{LogicalType, LogicalTypeRef, NativeType}; | ||
use indexmap::IndexSet; | ||
use itertools::Itertools; | ||
|
||
|
@@ -225,6 +226,89 @@ impl Display for TypeSignatureClass { | |
} | ||
} | ||
|
||
impl TypeSignatureClass { | ||
/// Get example acceptable types for this `TypeSignatureClass` | ||
/// | ||
/// This is used for `information_schema` and can be used to generate | ||
/// documentation or error messages. | ||
fn get_example_types(&self) -> Vec<DataType> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved logic that was related to TypeSignatureClass into methods |
||
match self { | ||
TypeSignatureClass::Native(l) => get_data_types(l.native()), | ||
TypeSignatureClass::Timestamp => { | ||
vec![ | ||
DataType::Timestamp(TimeUnit::Nanosecond, None), | ||
DataType::Timestamp( | ||
TimeUnit::Nanosecond, | ||
Some(TIMEZONE_WILDCARD.into()), | ||
), | ||
] | ||
} | ||
TypeSignatureClass::Time => { | ||
vec![DataType::Time64(TimeUnit::Nanosecond)] | ||
} | ||
TypeSignatureClass::Interval => { | ||
vec![DataType::Interval(IntervalUnit::DayTime)] | ||
} | ||
TypeSignatureClass::Duration => { | ||
vec![DataType::Duration(TimeUnit::Nanosecond)] | ||
} | ||
TypeSignatureClass::Integer => { | ||
vec![DataType::Int64] | ||
} | ||
} | ||
} | ||
|
||
/// Does the specified `NativeType` match this type signature class? | ||
pub fn matches_native_type( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be nice to make this private There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually we can't make this private, that is why I didn't move it as a method, to keep us easy to change the logic There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems to me like the exact details of what types are coerced to what other types is actually part of the DataFusion public API because those rules effect what queries will run Whenever we make changes to the coercsion rules, it seems to result in a bunch of issues reported by users (because some queries that used to work no longer do). Thus I propose we move towards a design that is both more explicit (and extensible) I realize we could probably achieve this goal without making more There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's try it |
||
self: &TypeSignatureClass, | ||
logical_type: &NativeType, | ||
) -> bool { | ||
if logical_type == &NativeType::Null { | ||
return true; | ||
} | ||
|
||
match self { | ||
TypeSignatureClass::Native(t) if t.native() == logical_type => true, | ||
TypeSignatureClass::Timestamp if logical_type.is_timestamp() => true, | ||
TypeSignatureClass::Time if logical_type.is_time() => true, | ||
TypeSignatureClass::Interval if logical_type.is_interval() => true, | ||
TypeSignatureClass::Duration if logical_type.is_duration() => true, | ||
TypeSignatureClass::Integer if logical_type.is_integer() => true, | ||
_ => false, | ||
} | ||
} | ||
|
||
/// What type would `origin_type` be casted to when casting to the specified native type? | ||
pub fn default_casted_type( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be nice to make this private There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As above, I don't fully understand the desire to keep it non |
||
&self, | ||
native_type: &NativeType, | ||
origin_type: &DataType, | ||
) -> datafusion_common::Result<DataType> { | ||
match self { | ||
TypeSignatureClass::Native(logical_type) => { | ||
logical_type.native().default_cast_for(origin_type) | ||
} | ||
// If the given type is already a timestamp, we don't change the unit and timezone | ||
TypeSignatureClass::Timestamp if native_type.is_timestamp() => { | ||
Ok(origin_type.to_owned()) | ||
} | ||
TypeSignatureClass::Time if native_type.is_time() => { | ||
Ok(origin_type.to_owned()) | ||
} | ||
TypeSignatureClass::Interval if native_type.is_interval() => { | ||
Ok(origin_type.to_owned()) | ||
} | ||
TypeSignatureClass::Duration if native_type.is_duration() => { | ||
Ok(origin_type.to_owned()) | ||
} | ||
TypeSignatureClass::Integer if native_type.is_integer() => { | ||
Ok(origin_type.to_owned()) | ||
} | ||
_ => internal_err!("May miss the matching logic in `matches_native_type`"), | ||
} | ||
} | ||
} | ||
|
||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] | ||
pub enum ArrayFunctionSignature { | ||
/// Specialized Signature for ArrayAppend and similar functions | ||
|
@@ -365,18 +449,23 @@ impl TypeSignature { | |
} | ||
} | ||
|
||
/// This function is used specifically internally for `information_schema` | ||
/// We suggest not to rely on this function | ||
/// | ||
/// Get all possible types for `information_schema` from the given `TypeSignature` | ||
// | ||
// TODO: Make this function private | ||
#[deprecated(since = "0.46.0", note = "See get_example_types instead")] | ||
pub fn get_possible_types(&self) -> Vec<Vec<DataType>> { | ||
self.get_example_types() | ||
} | ||
|
||
/// Return example acceptable types for this `TypeSignature`' | ||
/// | ||
/// Returns a Vec<DataType> for each argument to the function | ||
/// | ||
/// This is used for `information_schema` and can be used to generate | ||
/// documentation or error messages. | ||
pub fn get_example_types(&self) -> Vec<Vec<DataType>> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we make this private, it is not useful to make this public. I'm also thinking about other types than DataType |
||
match self { | ||
TypeSignature::Exact(types) => vec![types.clone()], | ||
TypeSignature::OneOf(types) => types | ||
.iter() | ||
.flat_map(|type_sig| type_sig.get_possible_types()) | ||
.flat_map(|type_sig| type_sig.get_example_types()) | ||
.collect(), | ||
TypeSignature::Uniform(arg_count, types) => types | ||
.iter() | ||
|
@@ -387,15 +476,13 @@ impl TypeSignature { | |
.iter() | ||
.map(|c| { | ||
let mut all_types: IndexSet<DataType> = | ||
get_possible_types_from_signature_classes(&c.desired_type) | ||
.into_iter() | ||
.collect(); | ||
c.desired_type.get_example_types().into_iter().collect(); | ||
|
||
if let Some(implicit_coercion) = &c.implicit_coercion { | ||
let allowed_casts: Vec<DataType> = implicit_coercion | ||
.allowed_source_types | ||
.iter() | ||
.flat_map(get_possible_types_from_signature_classes) | ||
.flat_map(|t| t.get_example_types()) | ||
.collect(); | ||
all_types.extend(allowed_casts); | ||
} | ||
|
@@ -429,32 +516,6 @@ impl TypeSignature { | |
} | ||
} | ||
|
||
fn get_possible_types_from_signature_classes( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. moved/renamed |
||
signature_classes: &TypeSignatureClass, | ||
) -> Vec<DataType> { | ||
match signature_classes { | ||
TypeSignatureClass::Native(l) => get_data_types(l.native()), | ||
TypeSignatureClass::Timestamp => { | ||
vec![ | ||
DataType::Timestamp(TimeUnit::Nanosecond, None), | ||
DataType::Timestamp(TimeUnit::Nanosecond, Some(TIMEZONE_WILDCARD.into())), | ||
] | ||
} | ||
TypeSignatureClass::Time => { | ||
vec![DataType::Time64(TimeUnit::Nanosecond)] | ||
} | ||
TypeSignatureClass::Interval => { | ||
vec![DataType::Interval(IntervalUnit::DayTime)] | ||
} | ||
TypeSignatureClass::Duration => { | ||
vec![DataType::Duration(TimeUnit::Nanosecond)] | ||
} | ||
TypeSignatureClass::Integer => { | ||
vec![DataType::Int64] | ||
} | ||
} | ||
} | ||
|
||
fn get_data_types(native_type: &NativeType) -> Vec<DataType> { | ||
match native_type { | ||
NativeType::Null => vec![DataType::Null], | ||
|
@@ -822,14 +883,14 @@ mod tests { | |
#[test] | ||
fn test_get_possible_types() { | ||
let type_signature = TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]); | ||
let possible_types = type_signature.get_possible_types(); | ||
let possible_types = type_signature.get_example_types(); | ||
assert_eq!(possible_types, vec![vec![DataType::Int32, DataType::Int64]]); | ||
|
||
let type_signature = TypeSignature::OneOf(vec![ | ||
TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]), | ||
TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]), | ||
]); | ||
let possible_types = type_signature.get_possible_types(); | ||
let possible_types = type_signature.get_example_types(); | ||
assert_eq!( | ||
possible_types, | ||
vec![ | ||
|
@@ -843,7 +904,7 @@ mod tests { | |
TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]), | ||
TypeSignature::Exact(vec![DataType::Utf8]), | ||
]); | ||
let possible_types = type_signature.get_possible_types(); | ||
let possible_types = type_signature.get_example_types(); | ||
assert_eq!( | ||
possible_types, | ||
vec![ | ||
|
@@ -855,7 +916,7 @@ mod tests { | |
|
||
let type_signature = | ||
TypeSignature::Uniform(2, vec![DataType::Float32, DataType::Int64]); | ||
let possible_types = type_signature.get_possible_types(); | ||
let possible_types = type_signature.get_example_types(); | ||
assert_eq!( | ||
possible_types, | ||
vec![ | ||
|
@@ -868,7 +929,7 @@ mod tests { | |
Coercion::new(TypeSignatureClass::Native(logical_string())), | ||
Coercion::new(TypeSignatureClass::Native(logical_int64())), | ||
]); | ||
let possible_types = type_signature.get_possible_types(); | ||
let possible_types = type_signature.get_example_types(); | ||
assert_eq!( | ||
possible_types, | ||
vec![ | ||
|
@@ -880,14 +941,14 @@ mod tests { | |
|
||
let type_signature = | ||
TypeSignature::Variadic(vec![DataType::Int32, DataType::Int64]); | ||
let possible_types = type_signature.get_possible_types(); | ||
let possible_types = type_signature.get_example_types(); | ||
assert_eq!( | ||
possible_types, | ||
vec![vec![DataType::Int32], vec![DataType::Int64]] | ||
); | ||
|
||
let type_signature = TypeSignature::Numeric(2); | ||
let possible_types = type_signature.get_possible_types(); | ||
let possible_types = type_signature.get_example_types(); | ||
assert_eq!( | ||
possible_types, | ||
vec![ | ||
|
@@ -905,7 +966,7 @@ mod tests { | |
); | ||
|
||
let type_signature = TypeSignature::String(2); | ||
let possible_types = type_signature.get_possible_types(); | ||
let possible_types = type_signature.get_example_types(); | ||
assert_eq!( | ||
possible_types, | ||
vec![ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,10 +27,7 @@ use datafusion_common::{ | |
}; | ||
use datafusion_common::{types::LogicalType, utils::coerced_fixed_size_list_to_list}; | ||
use datafusion_expr_common::{ | ||
signature::{ | ||
ArrayFunctionSignature, TypeSignatureClass, FIXED_SIZE_LIST_WILDCARD, | ||
TIMEZONE_WILDCARD, | ||
}, | ||
signature::{ArrayFunctionSignature, FIXED_SIZE_LIST_WILDCARD, TIMEZONE_WILDCARD}, | ||
type_coercion::binary::comparison_coercion_numeric, | ||
type_coercion::binary::string_coercion, | ||
}; | ||
|
@@ -599,76 +596,19 @@ fn get_valid_types( | |
|
||
let mut new_types = Vec::with_capacity(current_types.len()); | ||
for (current_type, param) in current_types.iter().zip(param_types.iter()) { | ||
let current_logical_type: NativeType = current_type.into(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. moved these into actual methods |
||
|
||
fn is_matched_type( | ||
target_type: &TypeSignatureClass, | ||
logical_type: &NativeType, | ||
) -> bool { | ||
if logical_type == &NativeType::Null { | ||
return true; | ||
} | ||
|
||
match target_type { | ||
TypeSignatureClass::Native(t) if t.native() == logical_type => { | ||
true | ||
} | ||
TypeSignatureClass::Timestamp if logical_type.is_timestamp() => { | ||
true | ||
} | ||
TypeSignatureClass::Time if logical_type.is_time() => true, | ||
TypeSignatureClass::Interval if logical_type.is_interval() => { | ||
true | ||
} | ||
TypeSignatureClass::Duration if logical_type.is_duration() => { | ||
true | ||
} | ||
TypeSignatureClass::Integer if logical_type.is_integer() => true, | ||
_ => false, | ||
} | ||
} | ||
|
||
fn default_casted_type( | ||
signature_class: &TypeSignatureClass, | ||
logical_type: &NativeType, | ||
origin_type: &DataType, | ||
) -> Result<DataType> { | ||
match signature_class { | ||
TypeSignatureClass::Native(logical_type) => { | ||
logical_type.native().default_cast_for(origin_type) | ||
} | ||
// If the given type is already a timestamp, we don't change the unit and timezone | ||
TypeSignatureClass::Timestamp if logical_type.is_timestamp() => { | ||
Ok(origin_type.to_owned()) | ||
} | ||
TypeSignatureClass::Time if logical_type.is_time() => { | ||
Ok(origin_type.to_owned()) | ||
} | ||
TypeSignatureClass::Interval if logical_type.is_interval() => { | ||
Ok(origin_type.to_owned()) | ||
} | ||
TypeSignatureClass::Duration if logical_type.is_duration() => { | ||
Ok(origin_type.to_owned()) | ||
} | ||
TypeSignatureClass::Integer if logical_type.is_integer() => { | ||
Ok(origin_type.to_owned()) | ||
} | ||
_ => internal_err!("May miss the matching logic in `is_matched_type`"), | ||
} | ||
} | ||
let current_native_type: NativeType = current_type.into(); | ||
|
||
if is_matched_type(¶m.desired_type, ¤t_logical_type) { | ||
let casted_type = default_casted_type( | ||
¶m.desired_type, | ||
¤t_logical_type, | ||
if param.desired_type.matches_native_type(¤t_native_type) { | ||
let casted_type = param.desired_type.default_casted_type( | ||
¤t_native_type, | ||
current_type, | ||
)?; | ||
|
||
new_types.push(casted_type); | ||
} else if param | ||
.allowed_source_types() | ||
.iter() | ||
.any(|t| is_matched_type(t, ¤t_logical_type)) { | ||
.any(|t| t.matches_native_type(¤t_native_type)) { | ||
// If the condition is met which means `implicit coercion`` is provided so we can safely unwrap | ||
let default_casted_type = param.default_casted_type().unwrap(); | ||
let casted_type = default_casted_type.default_cast_for(current_type)?; | ||
|
@@ -677,7 +617,7 @@ fn get_valid_types( | |
return internal_err!( | ||
"Expect {} but received {}, DataType: {}", | ||
param.desired_type, | ||
current_logical_type, | ||
current_native_type, | ||
current_type | ||
); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
renamed and deprecated
get_possible_types
to reflect what the function actually does