From 833d55df8ad85e2afdde426e83302e2a4a365230 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Fri, 31 May 2024 17:46:21 +0800 Subject: [PATCH] docs: add documents to substrait type variation consts (#10719) * docs: add documents to substrait type variation consts Signed-off-by: Ruihang Xia * rename and add todo Signed-off-by: Ruihang Xia * fix link style Signed-off-by: Ruihang Xia --------- Signed-off-by: Ruihang Xia --- .../substrait/src/logical_plan/consumer.rs | 162 +++++++------- .../substrait/src/logical_plan/producer.rs | 208 ++++++++++-------- datafusion/substrait/src/variation_const.rs | 39 ++-- 3 files changed, 229 insertions(+), 180 deletions(-) diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs index eb819e2c87dfc..597f34e89a02a 100644 --- a/datafusion/substrait/src/logical_plan/consumer.rs +++ b/datafusion/substrait/src/logical_plan/consumer.rs @@ -73,11 +73,14 @@ use std::str::FromStr; use std::sync::Arc; use crate::variation_const::{ - DATE_32_TYPE_REF, DATE_64_TYPE_REF, DECIMAL_128_TYPE_REF, DECIMAL_256_TYPE_REF, - DEFAULT_CONTAINER_TYPE_REF, DEFAULT_TYPE_REF, INTERVAL_DAY_TIME_TYPE_REF, - INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF, - LARGE_CONTAINER_TYPE_REF, TIMESTAMP_MICRO_TYPE_REF, TIMESTAMP_MILLI_TYPE_REF, - TIMESTAMP_NANO_TYPE_REF, TIMESTAMP_SECOND_TYPE_REF, UNSIGNED_INTEGER_TYPE_REF, + DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF, + DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF, + DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, + INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_REF, + INTERVAL_YEAR_MONTH_TYPE_REF, LARGE_CONTAINER_TYPE_VARIATION_REF, + TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF, + TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF, + UNSIGNED_INTEGER_TYPE_VARIATION_REF, }; enum ScalarFunctionType { @@ -1130,29 +1133,29 @@ fn from_substrait_type( Some(s_kind) => match s_kind { r#type::Kind::Bool(_) => Ok(DataType::Boolean), r#type::Kind::I8(integer) => match integer.type_variation_reference { - DEFAULT_TYPE_REF => Ok(DataType::Int8), - UNSIGNED_INTEGER_TYPE_REF => Ok(DataType::UInt8), + DEFAULT_TYPE_VARIATION_REF => Ok(DataType::Int8), + UNSIGNED_INTEGER_TYPE_VARIATION_REF => Ok(DataType::UInt8), v => not_impl_err!( "Unsupported Substrait type variation {v} of type {s_kind:?}" ), }, r#type::Kind::I16(integer) => match integer.type_variation_reference { - DEFAULT_TYPE_REF => Ok(DataType::Int16), - UNSIGNED_INTEGER_TYPE_REF => Ok(DataType::UInt16), + DEFAULT_TYPE_VARIATION_REF => Ok(DataType::Int16), + UNSIGNED_INTEGER_TYPE_VARIATION_REF => Ok(DataType::UInt16), v => not_impl_err!( "Unsupported Substrait type variation {v} of type {s_kind:?}" ), }, r#type::Kind::I32(integer) => match integer.type_variation_reference { - DEFAULT_TYPE_REF => Ok(DataType::Int32), - UNSIGNED_INTEGER_TYPE_REF => Ok(DataType::UInt32), + DEFAULT_TYPE_VARIATION_REF => Ok(DataType::Int32), + UNSIGNED_INTEGER_TYPE_VARIATION_REF => Ok(DataType::UInt32), v => not_impl_err!( "Unsupported Substrait type variation {v} of type {s_kind:?}" ), }, r#type::Kind::I64(integer) => match integer.type_variation_reference { - DEFAULT_TYPE_REF => Ok(DataType::Int64), - UNSIGNED_INTEGER_TYPE_REF => Ok(DataType::UInt64), + DEFAULT_TYPE_VARIATION_REF => Ok(DataType::Int64), + UNSIGNED_INTEGER_TYPE_VARIATION_REF => Ok(DataType::UInt64), v => not_impl_err!( "Unsupported Substrait type variation {v} of type {s_kind:?}" ), @@ -1160,16 +1163,16 @@ fn from_substrait_type( r#type::Kind::Fp32(_) => Ok(DataType::Float32), r#type::Kind::Fp64(_) => Ok(DataType::Float64), r#type::Kind::Timestamp(ts) => match ts.type_variation_reference { - TIMESTAMP_SECOND_TYPE_REF => { + TIMESTAMP_SECOND_TYPE_VARIATION_REF => { Ok(DataType::Timestamp(TimeUnit::Second, None)) } - TIMESTAMP_MILLI_TYPE_REF => { + TIMESTAMP_MILLI_TYPE_VARIATION_REF => { Ok(DataType::Timestamp(TimeUnit::Millisecond, None)) } - TIMESTAMP_MICRO_TYPE_REF => { + TIMESTAMP_MICRO_TYPE_VARIATION_REF => { Ok(DataType::Timestamp(TimeUnit::Microsecond, None)) } - TIMESTAMP_NANO_TYPE_REF => { + TIMESTAMP_NANO_TYPE_VARIATION_REF => { Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)) } v => not_impl_err!( @@ -1177,15 +1180,15 @@ fn from_substrait_type( ), }, r#type::Kind::Date(date) => match date.type_variation_reference { - DATE_32_TYPE_REF => Ok(DataType::Date32), - DATE_64_TYPE_REF => Ok(DataType::Date64), + DATE_32_TYPE_VARIATION_REF => Ok(DataType::Date32), + DATE_64_TYPE_VARIATION_REF => Ok(DataType::Date64), v => not_impl_err!( "Unsupported Substrait type variation {v} of type {s_kind:?}" ), }, r#type::Kind::Binary(binary) => match binary.type_variation_reference { - DEFAULT_CONTAINER_TYPE_REF => Ok(DataType::Binary), - LARGE_CONTAINER_TYPE_REF => Ok(DataType::LargeBinary), + DEFAULT_CONTAINER_TYPE_VARIATION_REF => Ok(DataType::Binary), + LARGE_CONTAINER_TYPE_VARIATION_REF => Ok(DataType::LargeBinary), v => not_impl_err!( "Unsupported Substrait type variation {v} of type {s_kind:?}" ), @@ -1194,8 +1197,8 @@ fn from_substrait_type( Ok(DataType::FixedSizeBinary(fixed.length)) } r#type::Kind::String(string) => match string.type_variation_reference { - DEFAULT_CONTAINER_TYPE_REF => Ok(DataType::Utf8), - LARGE_CONTAINER_TYPE_REF => Ok(DataType::LargeUtf8), + DEFAULT_CONTAINER_TYPE_VARIATION_REF => Ok(DataType::Utf8), + LARGE_CONTAINER_TYPE_VARIATION_REF => Ok(DataType::LargeUtf8), v => not_impl_err!( "Unsupported Substrait type variation {v} of type {s_kind:?}" ), @@ -1209,18 +1212,18 @@ fn from_substrait_type( is_substrait_type_nullable(inner_type)?, )); match list.type_variation_reference { - DEFAULT_CONTAINER_TYPE_REF => Ok(DataType::List(field)), - LARGE_CONTAINER_TYPE_REF => Ok(DataType::LargeList(field)), + DEFAULT_CONTAINER_TYPE_VARIATION_REF => Ok(DataType::List(field)), + LARGE_CONTAINER_TYPE_VARIATION_REF => Ok(DataType::LargeList(field)), v => not_impl_err!( "Unsupported Substrait type variation {v} of type {s_kind:?}" )?, } } r#type::Kind::Decimal(d) => match d.type_variation_reference { - DECIMAL_128_TYPE_REF => { + DECIMAL_128_TYPE_VARIATION_REF => { Ok(DataType::Decimal128(d.precision as u8, d.scale as i8)) } - DECIMAL_256_TYPE_REF => { + DECIMAL_256_TYPE_VARIATION_REF => { Ok(DataType::Decimal256(d.precision as u8, d.scale as i8)) } v => not_impl_err!( @@ -1397,29 +1400,29 @@ fn from_substrait_literal( let scalar_value = match &lit.literal_type { Some(LiteralType::Boolean(b)) => ScalarValue::Boolean(Some(*b)), Some(LiteralType::I8(n)) => match lit.type_variation_reference { - DEFAULT_TYPE_REF => ScalarValue::Int8(Some(*n as i8)), - UNSIGNED_INTEGER_TYPE_REF => ScalarValue::UInt8(Some(*n as u8)), + DEFAULT_TYPE_VARIATION_REF => ScalarValue::Int8(Some(*n as i8)), + UNSIGNED_INTEGER_TYPE_VARIATION_REF => ScalarValue::UInt8(Some(*n as u8)), others => { return substrait_err!("Unknown type variation reference {others}"); } }, Some(LiteralType::I16(n)) => match lit.type_variation_reference { - DEFAULT_TYPE_REF => ScalarValue::Int16(Some(*n as i16)), - UNSIGNED_INTEGER_TYPE_REF => ScalarValue::UInt16(Some(*n as u16)), + DEFAULT_TYPE_VARIATION_REF => ScalarValue::Int16(Some(*n as i16)), + UNSIGNED_INTEGER_TYPE_VARIATION_REF => ScalarValue::UInt16(Some(*n as u16)), others => { return substrait_err!("Unknown type variation reference {others}"); } }, Some(LiteralType::I32(n)) => match lit.type_variation_reference { - DEFAULT_TYPE_REF => ScalarValue::Int32(Some(*n)), - UNSIGNED_INTEGER_TYPE_REF => ScalarValue::UInt32(Some(*n as u32)), + DEFAULT_TYPE_VARIATION_REF => ScalarValue::Int32(Some(*n)), + UNSIGNED_INTEGER_TYPE_VARIATION_REF => ScalarValue::UInt32(Some(*n as u32)), others => { return substrait_err!("Unknown type variation reference {others}"); } }, Some(LiteralType::I64(n)) => match lit.type_variation_reference { - DEFAULT_TYPE_REF => ScalarValue::Int64(Some(*n)), - UNSIGNED_INTEGER_TYPE_REF => ScalarValue::UInt64(Some(*n as u64)), + DEFAULT_TYPE_VARIATION_REF => ScalarValue::Int64(Some(*n)), + UNSIGNED_INTEGER_TYPE_VARIATION_REF => ScalarValue::UInt64(Some(*n as u64)), others => { return substrait_err!("Unknown type variation reference {others}"); } @@ -1427,25 +1430,35 @@ fn from_substrait_literal( Some(LiteralType::Fp32(f)) => ScalarValue::Float32(Some(*f)), Some(LiteralType::Fp64(f)) => ScalarValue::Float64(Some(*f)), Some(LiteralType::Timestamp(t)) => match lit.type_variation_reference { - TIMESTAMP_SECOND_TYPE_REF => ScalarValue::TimestampSecond(Some(*t), None), - TIMESTAMP_MILLI_TYPE_REF => ScalarValue::TimestampMillisecond(Some(*t), None), - TIMESTAMP_MICRO_TYPE_REF => ScalarValue::TimestampMicrosecond(Some(*t), None), - TIMESTAMP_NANO_TYPE_REF => ScalarValue::TimestampNanosecond(Some(*t), None), + TIMESTAMP_SECOND_TYPE_VARIATION_REF => { + ScalarValue::TimestampSecond(Some(*t), None) + } + TIMESTAMP_MILLI_TYPE_VARIATION_REF => { + ScalarValue::TimestampMillisecond(Some(*t), None) + } + TIMESTAMP_MICRO_TYPE_VARIATION_REF => { + ScalarValue::TimestampMicrosecond(Some(*t), None) + } + TIMESTAMP_NANO_TYPE_VARIATION_REF => { + ScalarValue::TimestampNanosecond(Some(*t), None) + } others => { return substrait_err!("Unknown type variation reference {others}"); } }, Some(LiteralType::Date(d)) => ScalarValue::Date32(Some(*d)), Some(LiteralType::String(s)) => match lit.type_variation_reference { - DEFAULT_CONTAINER_TYPE_REF => ScalarValue::Utf8(Some(s.clone())), - LARGE_CONTAINER_TYPE_REF => ScalarValue::LargeUtf8(Some(s.clone())), + DEFAULT_CONTAINER_TYPE_VARIATION_REF => ScalarValue::Utf8(Some(s.clone())), + LARGE_CONTAINER_TYPE_VARIATION_REF => ScalarValue::LargeUtf8(Some(s.clone())), others => { return substrait_err!("Unknown type variation reference {others}"); } }, Some(LiteralType::Binary(b)) => match lit.type_variation_reference { - DEFAULT_CONTAINER_TYPE_REF => ScalarValue::Binary(Some(b.clone())), - LARGE_CONTAINER_TYPE_REF => ScalarValue::LargeBinary(Some(b.clone())), + DEFAULT_CONTAINER_TYPE_VARIATION_REF => ScalarValue::Binary(Some(b.clone())), + LARGE_CONTAINER_TYPE_VARIATION_REF => { + ScalarValue::LargeBinary(Some(b.clone())) + } others => { return substrait_err!("Unknown type variation reference {others}"); } @@ -1484,11 +1497,10 @@ fn from_substrait_literal( } let element_type = elements[0].data_type(); match lit.type_variation_reference { - DEFAULT_CONTAINER_TYPE_REF => ScalarValue::List(ScalarValue::new_list( - elements.as_slice(), - &element_type, - )), - LARGE_CONTAINER_TYPE_REF => ScalarValue::LargeList( + DEFAULT_CONTAINER_TYPE_VARIATION_REF => ScalarValue::List( + ScalarValue::new_list(elements.as_slice(), &element_type), + ), + LARGE_CONTAINER_TYPE_VARIATION_REF => ScalarValue::LargeList( ScalarValue::new_large_list(elements.as_slice(), &element_type), ), others => { @@ -1503,10 +1515,10 @@ fn from_substrait_literal( name_idx, )?; match lit.type_variation_reference { - DEFAULT_CONTAINER_TYPE_REF => { + DEFAULT_CONTAINER_TYPE_VARIATION_REF => { ScalarValue::List(ScalarValue::new_list(&[], &element_type)) } - LARGE_CONTAINER_TYPE_REF => ScalarValue::LargeList( + LARGE_CONTAINER_TYPE_VARIATION_REF => ScalarValue::LargeList( ScalarValue::new_large_list(&[], &element_type), ), others => { @@ -1590,29 +1602,29 @@ fn from_substrait_null( match kind { r#type::Kind::Bool(_) => Ok(ScalarValue::Boolean(None)), r#type::Kind::I8(integer) => match integer.type_variation_reference { - DEFAULT_TYPE_REF => Ok(ScalarValue::Int8(None)), - UNSIGNED_INTEGER_TYPE_REF => Ok(ScalarValue::UInt8(None)), + DEFAULT_TYPE_VARIATION_REF => Ok(ScalarValue::Int8(None)), + UNSIGNED_INTEGER_TYPE_VARIATION_REF => Ok(ScalarValue::UInt8(None)), v => not_impl_err!( "Unsupported Substrait type variation {v} of type {kind:?}" ), }, r#type::Kind::I16(integer) => match integer.type_variation_reference { - DEFAULT_TYPE_REF => Ok(ScalarValue::Int16(None)), - UNSIGNED_INTEGER_TYPE_REF => Ok(ScalarValue::UInt16(None)), + DEFAULT_TYPE_VARIATION_REF => Ok(ScalarValue::Int16(None)), + UNSIGNED_INTEGER_TYPE_VARIATION_REF => Ok(ScalarValue::UInt16(None)), v => not_impl_err!( "Unsupported Substrait type variation {v} of type {kind:?}" ), }, r#type::Kind::I32(integer) => match integer.type_variation_reference { - DEFAULT_TYPE_REF => Ok(ScalarValue::Int32(None)), - UNSIGNED_INTEGER_TYPE_REF => Ok(ScalarValue::UInt32(None)), + DEFAULT_TYPE_VARIATION_REF => Ok(ScalarValue::Int32(None)), + UNSIGNED_INTEGER_TYPE_VARIATION_REF => Ok(ScalarValue::UInt32(None)), v => not_impl_err!( "Unsupported Substrait type variation {v} of type {kind:?}" ), }, r#type::Kind::I64(integer) => match integer.type_variation_reference { - DEFAULT_TYPE_REF => Ok(ScalarValue::Int64(None)), - UNSIGNED_INTEGER_TYPE_REF => Ok(ScalarValue::UInt64(None)), + DEFAULT_TYPE_VARIATION_REF => Ok(ScalarValue::Int64(None)), + UNSIGNED_INTEGER_TYPE_VARIATION_REF => Ok(ScalarValue::UInt64(None)), v => not_impl_err!( "Unsupported Substrait type variation {v} of type {kind:?}" ), @@ -1620,14 +1632,16 @@ fn from_substrait_null( r#type::Kind::Fp32(_) => Ok(ScalarValue::Float32(None)), r#type::Kind::Fp64(_) => Ok(ScalarValue::Float64(None)), r#type::Kind::Timestamp(ts) => match ts.type_variation_reference { - TIMESTAMP_SECOND_TYPE_REF => Ok(ScalarValue::TimestampSecond(None, None)), - TIMESTAMP_MILLI_TYPE_REF => { + TIMESTAMP_SECOND_TYPE_VARIATION_REF => { + Ok(ScalarValue::TimestampSecond(None, None)) + } + TIMESTAMP_MILLI_TYPE_VARIATION_REF => { Ok(ScalarValue::TimestampMillisecond(None, None)) } - TIMESTAMP_MICRO_TYPE_REF => { + TIMESTAMP_MICRO_TYPE_VARIATION_REF => { Ok(ScalarValue::TimestampMicrosecond(None, None)) } - TIMESTAMP_NANO_TYPE_REF => { + TIMESTAMP_NANO_TYPE_VARIATION_REF => { Ok(ScalarValue::TimestampNanosecond(None, None)) } v => not_impl_err!( @@ -1635,23 +1649,23 @@ fn from_substrait_null( ), }, r#type::Kind::Date(date) => match date.type_variation_reference { - DATE_32_TYPE_REF => Ok(ScalarValue::Date32(None)), - DATE_64_TYPE_REF => Ok(ScalarValue::Date64(None)), + DATE_32_TYPE_VARIATION_REF => Ok(ScalarValue::Date32(None)), + DATE_64_TYPE_VARIATION_REF => Ok(ScalarValue::Date64(None)), v => not_impl_err!( "Unsupported Substrait type variation {v} of type {kind:?}" ), }, r#type::Kind::Binary(binary) => match binary.type_variation_reference { - DEFAULT_CONTAINER_TYPE_REF => Ok(ScalarValue::Binary(None)), - LARGE_CONTAINER_TYPE_REF => Ok(ScalarValue::LargeBinary(None)), + DEFAULT_CONTAINER_TYPE_VARIATION_REF => Ok(ScalarValue::Binary(None)), + LARGE_CONTAINER_TYPE_VARIATION_REF => Ok(ScalarValue::LargeBinary(None)), v => not_impl_err!( "Unsupported Substrait type variation {v} of type {kind:?}" ), }, // FixedBinary is not supported because `None` doesn't have length r#type::Kind::String(string) => match string.type_variation_reference { - DEFAULT_CONTAINER_TYPE_REF => Ok(ScalarValue::Utf8(None)), - LARGE_CONTAINER_TYPE_REF => Ok(ScalarValue::LargeUtf8(None)), + DEFAULT_CONTAINER_TYPE_VARIATION_REF => Ok(ScalarValue::Utf8(None)), + LARGE_CONTAINER_TYPE_VARIATION_REF => Ok(ScalarValue::LargeUtf8(None)), v => not_impl_err!( "Unsupported Substrait type variation {v} of type {kind:?}" ), @@ -1671,12 +1685,12 @@ fn from_substrait_null( true, ); match l.type_variation_reference { - DEFAULT_CONTAINER_TYPE_REF => Ok(ScalarValue::List(Arc::new( - GenericListArray::new_null(field.into(), 1), - ))), - LARGE_CONTAINER_TYPE_REF => Ok(ScalarValue::LargeList(Arc::new( - GenericListArray::new_null(field.into(), 1), - ))), + DEFAULT_CONTAINER_TYPE_VARIATION_REF => Ok(ScalarValue::List( + Arc::new(GenericListArray::new_null(field.into(), 1)), + )), + LARGE_CONTAINER_TYPE_VARIATION_REF => Ok(ScalarValue::LargeList( + Arc::new(GenericListArray::new_null(field.into(), 1)), + )), v => not_impl_err!( "Unsupported Substrait type variation {v} of type {kind:?}" ), diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs index 010386bf97ceb..0208b010c856b 100644 --- a/datafusion/substrait/src/logical_plan/producer.rs +++ b/datafusion/substrait/src/logical_plan/producer.rs @@ -91,13 +91,15 @@ use substrait::{ }; use crate::variation_const::{ - DATE_32_TYPE_REF, DATE_64_TYPE_REF, DECIMAL_128_TYPE_REF, DECIMAL_256_TYPE_REF, - DEFAULT_CONTAINER_TYPE_REF, DEFAULT_TYPE_REF, INTERVAL_DAY_TIME_TYPE_REF, - INTERVAL_DAY_TIME_TYPE_URL, INTERVAL_MONTH_DAY_NANO_TYPE_REF, - INTERVAL_MONTH_DAY_NANO_TYPE_URL, INTERVAL_YEAR_MONTH_TYPE_REF, - INTERVAL_YEAR_MONTH_TYPE_URL, LARGE_CONTAINER_TYPE_REF, TIMESTAMP_MICRO_TYPE_REF, - TIMESTAMP_MILLI_TYPE_REF, TIMESTAMP_NANO_TYPE_REF, TIMESTAMP_SECOND_TYPE_REF, - UNSIGNED_INTEGER_TYPE_REF, + DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF, + DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF, + DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, + INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_DAY_TIME_TYPE_URL, + INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_URL, + INTERVAL_YEAR_MONTH_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_URL, + LARGE_CONTAINER_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF, + TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF, + TIMESTAMP_SECOND_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF, }; /// Convert DataFusion LogicalPlan to Substrait Plan @@ -626,7 +628,7 @@ fn to_substrait_named_struct(schema: &DFSchemaRef) -> Result { .iter() .map(|f| to_substrait_type(f.data_type(), f.is_nullable())) .collect::>()?, - type_variation_reference: DEFAULT_TYPE_REF, + type_variation_reference: DEFAULT_TYPE_VARIATION_REF, nullability: r#type::Nullability::Unspecified as i32, }; @@ -1430,78 +1432,78 @@ fn to_substrait_type(dt: &DataType, nullable: bool) -> Result internal_err!("Null cast is not valid"), DataType::Boolean => Ok(substrait::proto::Type { kind: Some(r#type::Kind::Bool(r#type::Boolean { - type_variation_reference: DEFAULT_TYPE_REF, + type_variation_reference: DEFAULT_TYPE_VARIATION_REF, nullability, })), }), DataType::Int8 => Ok(substrait::proto::Type { kind: Some(r#type::Kind::I8(r#type::I8 { - type_variation_reference: DEFAULT_TYPE_REF, + type_variation_reference: DEFAULT_TYPE_VARIATION_REF, nullability, })), }), DataType::UInt8 => Ok(substrait::proto::Type { kind: Some(r#type::Kind::I8(r#type::I8 { - type_variation_reference: UNSIGNED_INTEGER_TYPE_REF, + type_variation_reference: UNSIGNED_INTEGER_TYPE_VARIATION_REF, nullability, })), }), DataType::Int16 => Ok(substrait::proto::Type { kind: Some(r#type::Kind::I16(r#type::I16 { - type_variation_reference: DEFAULT_TYPE_REF, + type_variation_reference: DEFAULT_TYPE_VARIATION_REF, nullability, })), }), DataType::UInt16 => Ok(substrait::proto::Type { kind: Some(r#type::Kind::I16(r#type::I16 { - type_variation_reference: UNSIGNED_INTEGER_TYPE_REF, + type_variation_reference: UNSIGNED_INTEGER_TYPE_VARIATION_REF, nullability, })), }), DataType::Int32 => Ok(substrait::proto::Type { kind: Some(r#type::Kind::I32(r#type::I32 { - type_variation_reference: DEFAULT_TYPE_REF, + type_variation_reference: DEFAULT_TYPE_VARIATION_REF, nullability, })), }), DataType::UInt32 => Ok(substrait::proto::Type { kind: Some(r#type::Kind::I32(r#type::I32 { - type_variation_reference: UNSIGNED_INTEGER_TYPE_REF, + type_variation_reference: UNSIGNED_INTEGER_TYPE_VARIATION_REF, nullability, })), }), DataType::Int64 => Ok(substrait::proto::Type { kind: Some(r#type::Kind::I64(r#type::I64 { - type_variation_reference: DEFAULT_TYPE_REF, + type_variation_reference: DEFAULT_TYPE_VARIATION_REF, nullability, })), }), DataType::UInt64 => Ok(substrait::proto::Type { kind: Some(r#type::Kind::I64(r#type::I64 { - type_variation_reference: UNSIGNED_INTEGER_TYPE_REF, + type_variation_reference: UNSIGNED_INTEGER_TYPE_VARIATION_REF, nullability, })), }), // Float16 is not supported in Substrait DataType::Float32 => Ok(substrait::proto::Type { kind: Some(r#type::Kind::Fp32(r#type::Fp32 { - type_variation_reference: DEFAULT_TYPE_REF, + type_variation_reference: DEFAULT_TYPE_VARIATION_REF, nullability, })), }), DataType::Float64 => Ok(substrait::proto::Type { kind: Some(r#type::Kind::Fp64(r#type::Fp64 { - type_variation_reference: DEFAULT_TYPE_REF, + type_variation_reference: DEFAULT_TYPE_VARIATION_REF, nullability, })), }), // Timezone is ignored. DataType::Timestamp(unit, _) => { let type_variation_reference = match unit { - TimeUnit::Second => TIMESTAMP_SECOND_TYPE_REF, - TimeUnit::Millisecond => TIMESTAMP_MILLI_TYPE_REF, - TimeUnit::Microsecond => TIMESTAMP_MICRO_TYPE_REF, - TimeUnit::Nanosecond => TIMESTAMP_NANO_TYPE_REF, + TimeUnit::Second => TIMESTAMP_SECOND_TYPE_VARIATION_REF, + TimeUnit::Millisecond => TIMESTAMP_MILLI_TYPE_VARIATION_REF, + TimeUnit::Microsecond => TIMESTAMP_MICRO_TYPE_VARIATION_REF, + TimeUnit::Nanosecond => TIMESTAMP_NANO_TYPE_VARIATION_REF, }; Ok(substrait::proto::Type { kind: Some(r#type::Kind::Timestamp(r#type::Timestamp { @@ -1512,13 +1514,13 @@ fn to_substrait_type(dt: &DataType, nullable: bool) -> Result Ok(substrait::proto::Type { kind: Some(r#type::Kind::Date(r#type::Date { - type_variation_reference: DATE_32_TYPE_REF, + type_variation_reference: DATE_32_TYPE_VARIATION_REF, nullability, })), }), DataType::Date64 => Ok(substrait::proto::Type { kind: Some(r#type::Kind::Date(r#type::Date { - type_variation_reference: DATE_64_TYPE_REF, + type_variation_reference: DATE_64_TYPE_VARIATION_REF, nullability, })), }), @@ -1527,7 +1529,7 @@ fn to_substrait_type(dt: &DataType, nullable: bool) -> Result Result Result Result Ok(substrait::proto::Type { kind: Some(r#type::Kind::Binary(r#type::Binary { - type_variation_reference: DEFAULT_CONTAINER_TYPE_REF, + type_variation_reference: DEFAULT_CONTAINER_TYPE_VARIATION_REF, nullability, })), }), DataType::FixedSizeBinary(length) => Ok(substrait::proto::Type { kind: Some(r#type::Kind::FixedBinary(r#type::FixedBinary { length: *length, - type_variation_reference: DEFAULT_TYPE_REF, + type_variation_reference: DEFAULT_TYPE_VARIATION_REF, nullability, })), }), DataType::LargeBinary => Ok(substrait::proto::Type { kind: Some(r#type::Kind::Binary(r#type::Binary { - type_variation_reference: LARGE_CONTAINER_TYPE_REF, + type_variation_reference: LARGE_CONTAINER_TYPE_VARIATION_REF, nullability, })), }), DataType::Utf8 => Ok(substrait::proto::Type { kind: Some(r#type::Kind::String(r#type::String { - type_variation_reference: DEFAULT_CONTAINER_TYPE_REF, + type_variation_reference: DEFAULT_CONTAINER_TYPE_VARIATION_REF, nullability, })), }), DataType::LargeUtf8 => Ok(substrait::proto::Type { kind: Some(r#type::Kind::String(r#type::String { - type_variation_reference: LARGE_CONTAINER_TYPE_REF, + type_variation_reference: LARGE_CONTAINER_TYPE_VARIATION_REF, nullability, })), }), @@ -1601,7 +1603,7 @@ fn to_substrait_type(dt: &DataType, nullable: bool) -> Result Result Result Ok(substrait::proto::Type { kind: Some(r#type::Kind::Decimal(r#type::Decimal { - type_variation_reference: DECIMAL_128_TYPE_REF, + type_variation_reference: DECIMAL_128_TYPE_VARIATION_REF, nullability, scale: *s as i32, precision: *p as i32, @@ -1639,7 +1641,7 @@ fn to_substrait_type(dt: &DataType, nullable: bool) -> Result Ok(substrait::proto::Type { kind: Some(r#type::Kind::Decimal(r#type::Decimal { - type_variation_reference: DECIMAL_256_TYPE_REF, + type_variation_reference: DECIMAL_256_TYPE_VARIATION_REF, nullability, scale: *s as i32, precision: *p as i32, @@ -1861,7 +1863,7 @@ fn to_substrait_literal(value: &ScalarValue) -> Result { if value.is_null() { return Ok(Literal { nullable: true, - type_variation_reference: DEFAULT_TYPE_REF, + type_variation_reference: DEFAULT_TYPE_VARIATION_REF, literal_type: Some(LiteralType::Null(to_substrait_type( &value.data_type(), true, @@ -1869,38 +1871,58 @@ fn to_substrait_literal(value: &ScalarValue) -> Result { }); } let (literal_type, type_variation_reference) = match value { - ScalarValue::Boolean(Some(b)) => (LiteralType::Boolean(*b), DEFAULT_TYPE_REF), - ScalarValue::Int8(Some(n)) => (LiteralType::I8(*n as i32), DEFAULT_TYPE_REF), - ScalarValue::UInt8(Some(n)) => { - (LiteralType::I8(*n as i32), UNSIGNED_INTEGER_TYPE_REF) + ScalarValue::Boolean(Some(b)) => { + (LiteralType::Boolean(*b), DEFAULT_TYPE_VARIATION_REF) } - ScalarValue::Int16(Some(n)) => (LiteralType::I16(*n as i32), DEFAULT_TYPE_REF), - ScalarValue::UInt16(Some(n)) => { - (LiteralType::I16(*n as i32), UNSIGNED_INTEGER_TYPE_REF) + ScalarValue::Int8(Some(n)) => { + (LiteralType::I8(*n as i32), DEFAULT_TYPE_VARIATION_REF) } - ScalarValue::Int32(Some(n)) => (LiteralType::I32(*n), DEFAULT_TYPE_REF), - ScalarValue::UInt32(Some(n)) => { - (LiteralType::I32(*n as i32), UNSIGNED_INTEGER_TYPE_REF) - } - ScalarValue::Int64(Some(n)) => (LiteralType::I64(*n), DEFAULT_TYPE_REF), - ScalarValue::UInt64(Some(n)) => { - (LiteralType::I64(*n as i64), UNSIGNED_INTEGER_TYPE_REF) - } - ScalarValue::Float32(Some(f)) => (LiteralType::Fp32(*f), DEFAULT_TYPE_REF), - ScalarValue::Float64(Some(f)) => (LiteralType::Fp64(*f), DEFAULT_TYPE_REF), - ScalarValue::TimestampSecond(Some(t), _) => { - (LiteralType::Timestamp(*t), TIMESTAMP_SECOND_TYPE_REF) + ScalarValue::UInt8(Some(n)) => ( + LiteralType::I8(*n as i32), + UNSIGNED_INTEGER_TYPE_VARIATION_REF, + ), + ScalarValue::Int16(Some(n)) => { + (LiteralType::I16(*n as i32), DEFAULT_TYPE_VARIATION_REF) } - ScalarValue::TimestampMillisecond(Some(t), _) => { - (LiteralType::Timestamp(*t), TIMESTAMP_MILLI_TYPE_REF) + ScalarValue::UInt16(Some(n)) => ( + LiteralType::I16(*n as i32), + UNSIGNED_INTEGER_TYPE_VARIATION_REF, + ), + ScalarValue::Int32(Some(n)) => (LiteralType::I32(*n), DEFAULT_TYPE_VARIATION_REF), + ScalarValue::UInt32(Some(n)) => ( + LiteralType::I32(*n as i32), + UNSIGNED_INTEGER_TYPE_VARIATION_REF, + ), + ScalarValue::Int64(Some(n)) => (LiteralType::I64(*n), DEFAULT_TYPE_VARIATION_REF), + ScalarValue::UInt64(Some(n)) => ( + LiteralType::I64(*n as i64), + UNSIGNED_INTEGER_TYPE_VARIATION_REF, + ), + ScalarValue::Float32(Some(f)) => { + (LiteralType::Fp32(*f), DEFAULT_TYPE_VARIATION_REF) } - ScalarValue::TimestampMicrosecond(Some(t), _) => { - (LiteralType::Timestamp(*t), TIMESTAMP_MICRO_TYPE_REF) + ScalarValue::Float64(Some(f)) => { + (LiteralType::Fp64(*f), DEFAULT_TYPE_VARIATION_REF) } - ScalarValue::TimestampNanosecond(Some(t), _) => { - (LiteralType::Timestamp(*t), TIMESTAMP_NANO_TYPE_REF) + ScalarValue::TimestampSecond(Some(t), _) => ( + LiteralType::Timestamp(*t), + TIMESTAMP_SECOND_TYPE_VARIATION_REF, + ), + ScalarValue::TimestampMillisecond(Some(t), _) => ( + LiteralType::Timestamp(*t), + TIMESTAMP_MILLI_TYPE_VARIATION_REF, + ), + ScalarValue::TimestampMicrosecond(Some(t), _) => ( + LiteralType::Timestamp(*t), + TIMESTAMP_MICRO_TYPE_VARIATION_REF, + ), + ScalarValue::TimestampNanosecond(Some(t), _) => ( + LiteralType::Timestamp(*t), + TIMESTAMP_NANO_TYPE_VARIATION_REF, + ), + ScalarValue::Date32(Some(d)) => { + (LiteralType::Date(*d), DATE_32_TYPE_VARIATION_REF) } - ScalarValue::Date32(Some(d)) => (LiteralType::Date(*d), DATE_32_TYPE_REF), // Date64 literal is not supported in Substrait ScalarValue::IntervalYearMonth(Some(i)) => { let bytes = i.to_le_bytes(); @@ -1911,7 +1933,7 @@ fn to_substrait_literal(value: &ScalarValue) -> Result { parameter: Some(parameter::Parameter::DataType( substrait::proto::Type { kind: Some(r#type::Kind::I32(r#type::I32 { - type_variation_reference: DEFAULT_TYPE_REF, + type_variation_reference: DEFAULT_TYPE_VARIATION_REF, nullability: r#type::Nullability::Required as i32, })), }, @@ -1931,7 +1953,7 @@ fn to_substrait_literal(value: &ScalarValue) -> Result { let i64_param = Parameter { parameter: Some(parameter::Parameter::DataType(substrait::proto::Type { kind: Some(r#type::Kind::I64(r#type::I64 { - type_variation_reference: DEFAULT_TYPE_REF, + type_variation_reference: DEFAULT_TYPE_VARIATION_REF, nullability: r#type::Nullability::Required as i32, })), })), @@ -1957,7 +1979,7 @@ fn to_substrait_literal(value: &ScalarValue) -> Result { parameter: Some(parameter::Parameter::DataType( substrait::proto::Type { kind: Some(r#type::Kind::I64(r#type::I64 { - type_variation_reference: DEFAULT_TYPE_REF, + type_variation_reference: DEFAULT_TYPE_VARIATION_REF, nullability: r#type::Nullability::Required as i32, })), }, @@ -1971,36 +1993,42 @@ fn to_substrait_literal(value: &ScalarValue) -> Result { INTERVAL_DAY_TIME_TYPE_REF, ) } - ScalarValue::Binary(Some(b)) => { - (LiteralType::Binary(b.clone()), DEFAULT_CONTAINER_TYPE_REF) - } - ScalarValue::LargeBinary(Some(b)) => { - (LiteralType::Binary(b.clone()), LARGE_CONTAINER_TYPE_REF) - } - ScalarValue::FixedSizeBinary(_, Some(b)) => { - (LiteralType::FixedBinary(b.clone()), DEFAULT_TYPE_REF) - } - ScalarValue::Utf8(Some(s)) => { - (LiteralType::String(s.clone()), DEFAULT_CONTAINER_TYPE_REF) - } - ScalarValue::LargeUtf8(Some(s)) => { - (LiteralType::String(s.clone()), LARGE_CONTAINER_TYPE_REF) - } + ScalarValue::Binary(Some(b)) => ( + LiteralType::Binary(b.clone()), + DEFAULT_CONTAINER_TYPE_VARIATION_REF, + ), + ScalarValue::LargeBinary(Some(b)) => ( + LiteralType::Binary(b.clone()), + LARGE_CONTAINER_TYPE_VARIATION_REF, + ), + ScalarValue::FixedSizeBinary(_, Some(b)) => ( + LiteralType::FixedBinary(b.clone()), + DEFAULT_TYPE_VARIATION_REF, + ), + ScalarValue::Utf8(Some(s)) => ( + LiteralType::String(s.clone()), + DEFAULT_CONTAINER_TYPE_VARIATION_REF, + ), + ScalarValue::LargeUtf8(Some(s)) => ( + LiteralType::String(s.clone()), + LARGE_CONTAINER_TYPE_VARIATION_REF, + ), ScalarValue::Decimal128(v, p, s) if v.is_some() => ( LiteralType::Decimal(Decimal { value: v.unwrap().to_le_bytes().to_vec(), precision: *p as i32, scale: *s as i32, }), - DECIMAL_128_TYPE_REF, + DECIMAL_128_TYPE_VARIATION_REF, ), ScalarValue::List(l) => ( convert_array_to_literal_list(l)?, - DEFAULT_CONTAINER_TYPE_REF, + DEFAULT_CONTAINER_TYPE_VARIATION_REF, + ), + ScalarValue::LargeList(l) => ( + convert_array_to_literal_list(l)?, + LARGE_CONTAINER_TYPE_VARIATION_REF, ), - ScalarValue::LargeList(l) => { - (convert_array_to_literal_list(l)?, LARGE_CONTAINER_TYPE_REF) - } ScalarValue::Struct(s) => ( LiteralType::Struct(Struct { fields: s @@ -2011,11 +2039,11 @@ fn to_substrait_literal(value: &ScalarValue) -> Result { }) .collect::>>()?, }), - DEFAULT_TYPE_REF, + DEFAULT_TYPE_VARIATION_REF, ), _ => ( not_impl_err!("Unsupported literal: {value:?}")?, - DEFAULT_TYPE_REF, + DEFAULT_TYPE_VARIATION_REF, ), }; diff --git a/datafusion/substrait/src/variation_const.rs b/datafusion/substrait/src/variation_const.rs index 51c0d3b0211e0..27f4b3ea228a6 100644 --- a/datafusion/substrait/src/variation_const.rs +++ b/datafusion/substrait/src/variation_const.rs @@ -18,28 +18,35 @@ //! Type variation constants //! //! To add support for types not in the [core specification](https://substrait.io/types/type_classes/), -//! we make use of the [simple extensions](https://substrait.io/extensions/#simple-extensions) of substrait -//! type. This module contains the constants used to identify the type variation. +//! we make use of the [simple extensions] of substrait type. This module contains the constants used +//! to identify the type variation. //! //! The rules of type variations here are: //! - Default type reference is 0. It is used when the actual type is the same with the original type. //! - Extended variant type references start from 1, and ususlly increase by 1. +//! +//! Definitions here are not the final form. All the non-system-preferred variations will be defined +//! using [simple extensions] as per the [spec of type_variations](https://substrait.io/types/type_variations/) +//! +//! [simple extensions]: (https://substrait.io/extensions/#simple-extensions) -// For type variations -pub const DEFAULT_TYPE_REF: u32 = 0; -pub const UNSIGNED_INTEGER_TYPE_REF: u32 = 1; -pub const TIMESTAMP_SECOND_TYPE_REF: u32 = 0; -pub const TIMESTAMP_MILLI_TYPE_REF: u32 = 1; -pub const TIMESTAMP_MICRO_TYPE_REF: u32 = 2; -pub const TIMESTAMP_NANO_TYPE_REF: u32 = 3; -pub const DATE_32_TYPE_REF: u32 = 0; -pub const DATE_64_TYPE_REF: u32 = 1; -pub const DEFAULT_CONTAINER_TYPE_REF: u32 = 0; -pub const LARGE_CONTAINER_TYPE_REF: u32 = 1; -pub const DECIMAL_128_TYPE_REF: u32 = 0; -pub const DECIMAL_256_TYPE_REF: u32 = 1; +// For [type variations](https://substrait.io/types/type_variations/#type-variations) in substrait. +// Type variations are used to represent different types based on one type class. +/// The "system-preferred" variation (i.e., no variation). +pub const DEFAULT_TYPE_VARIATION_REF: u32 = 0; +pub const UNSIGNED_INTEGER_TYPE_VARIATION_REF: u32 = 1; +pub const TIMESTAMP_SECOND_TYPE_VARIATION_REF: u32 = 0; +pub const TIMESTAMP_MILLI_TYPE_VARIATION_REF: u32 = 1; +pub const TIMESTAMP_MICRO_TYPE_VARIATION_REF: u32 = 2; +pub const TIMESTAMP_NANO_TYPE_VARIATION_REF: u32 = 3; +pub const DATE_32_TYPE_VARIATION_REF: u32 = 0; +pub const DATE_64_TYPE_VARIATION_REF: u32 = 1; +pub const DEFAULT_CONTAINER_TYPE_VARIATION_REF: u32 = 0; +pub const LARGE_CONTAINER_TYPE_VARIATION_REF: u32 = 1; +pub const DECIMAL_128_TYPE_VARIATION_REF: u32 = 0; +pub const DECIMAL_256_TYPE_VARIATION_REF: u32 = 1; -// For custom types +// For [user-defined types](https://substrait.io/types/type_classes/#user-defined-types). /// For [`DataType::Interval`] with [`IntervalUnit::YearMonth`]. /// /// An `i32` for elapsed whole months. See also [`ScalarValue::IntervalYearMonth`]