Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: remove datafusion dependency from 'parser' crate #2904

Merged
merged 7 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ object_store_util = { path = "../object_store_util" }
pgrepr = { path = "../pgrepr" }
pgsrv = { path = "../pgsrv" }
protogen = { path = "../protogen" }
parser = { path = "../parser" }
proxyutil = { path = "../proxyutil" }
rpcsrv = { path = "../rpcsrv" }
slt = { path = "../slt" }
Expand Down
6 changes: 3 additions & 3 deletions crates/cli/src/highlighter.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use std::io::{self};

use datafusion::sql::sqlparser::dialect::GenericDialect;
use datafusion::sql::sqlparser::keywords::Keyword;
use datafusion::sql::sqlparser::tokenizer::{Token, Tokenizer};
use nu_ansi_term::{Color, Style};
use parser::sqlparser::dialect::GenericDialect;
use parser::sqlparser::keywords::Keyword;
use parser::sqlparser::tokenizer::{Token, Tokenizer};
use reedline::{Highlighter, Hinter, SearchQuery, StyledText, ValidationResult, Validator};
use sqlbuiltins::functions::DEFAULT_BUILTIN_FUNCTIONS;

Expand Down
1 change: 1 addition & 0 deletions crates/datafusion_ext/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ telemetry = { path = "../telemetry" }
catalog = { path = "../catalog" }
decimal = { path = "../decimal" }
protogen = { path = "../protogen" }
parser = { path = "../parser" }
pgrepr = { path = "../pgrepr" }
serde_json = { workspace = true }
datafusion = { workspace = true }
Expand Down
179 changes: 179 additions & 0 deletions crates/datafusion_ext/src/conversion.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
//! Conversion functions for converting between SQL AST types and DataFusion types
//! These are needed to insulate us from datafusion's reexport of sqlparser::ast.
//! Most of the types are the exact same as the ones in datafusion, but on a different version.
use datafusion::common::plan_err;
use datafusion::error::{DataFusionError, Result};
use datafusion::logical_expr::{WindowFrame, WindowFrameBound, WindowFrameUnits};
use datafusion::scalar::ScalarValue;
use parser::options::{CompressionTypeVariant, FileType};
use parser::sqlparser::ast;

#[repr(transparent)]
/// transparent wrapper around AST types to implement conversions to datafusion types
///
/// This allows us to be fully decoupled from datafusion's AST types _(reexported from sqlparser::ast)_
pub struct AstWrap<T>(pub(crate) T);

/// Convert a wrapped type to the target type
/// NOTE: `impl From<T>` can't be used directly because we can't implement `From` for types we don't own.
/// The input types are from the `parser` crate, and most of the target types are from the `datafusion` crate.
/// So this just provides a utility function to convert between the two.
/// It is the equivalent of `AstWrap(value).into()`, but provides a more readable API.
pub fn convert<F, I>(value: F) -> I
where
AstWrap<F>: Into<I>,
{
AstWrap(value).into()
}

/// Try to convert a wrapped type to the target type
/// NOTE: `impl TryInto<T>` can't be used directly because we can't implement `TryInto` for types we don't own.
/// The input types are from the `parser` crate, and most of the target types are from the `datafusion` crate.
/// So this just provides a utility function to convert between the two.
/// It is the equivalent of `AstWrap(value).try_into()`, but provides a more readable API.
pub fn try_convert<F, I>(value: F) -> std::result::Result<I, <AstWrap<F> as TryInto<I>>::Error>
where
AstWrap<F>: TryInto<I>,
{
AstWrap(value).try_into()
}

impl From<AstWrap<ast::Ident>> for datafusion::sql::sqlparser::ast::Ident {
fn from(value: AstWrap<ast::Ident>) -> Self {
datafusion::sql::sqlparser::ast::Ident {
value: value.0.value,
quote_style: value.0.quote_style,
}
}
}

impl<T> From<AstWrap<Option<T>>> for Option<T>
where
T: From<AstWrap<T>>,
{
fn from(value: AstWrap<Option<T>>) -> Self {
value.0
}
}

impl From<AstWrap<ast::IdentWithAlias>> for datafusion::sql::sqlparser::ast::IdentWithAlias {
fn from(value: AstWrap<ast::IdentWithAlias>) -> Self {
datafusion::sql::sqlparser::ast::IdentWithAlias {
ident: convert(value.0.ident),
alias: convert(value.0.alias),
}
}
}
impl From<AstWrap<Vec<ast::Ident>>> for Vec<datafusion::sql::sqlparser::ast::Ident> {
fn from(value: AstWrap<Vec<ast::Ident>>) -> Self {
value.0.into_iter().map(convert).collect()
}
}
impl From<AstWrap<ast::ObjectName>> for datafusion::sql::sqlparser::ast::ObjectName {
fn from(value: AstWrap<ast::ObjectName>) -> Self {
datafusion::sql::sqlparser::ast::ObjectName(value.0 .0.into_iter().map(convert).collect())
}
}
impl From<AstWrap<ast::WindowFrameUnits>> for WindowFrameUnits {
fn from(value: AstWrap<ast::WindowFrameUnits>) -> Self {
match value.0 {
ast::WindowFrameUnits::Range => Self::Range,
ast::WindowFrameUnits::Groups => Self::Groups,
ast::WindowFrameUnits::Rows => Self::Rows,
}
}
}

impl TryFrom<AstWrap<ast::WindowFrameBound>> for WindowFrameBound {
type Error = DataFusionError;
fn try_from(value: AstWrap<ast::WindowFrameBound>) -> Result<Self> {
Ok(match value.0 {
ast::WindowFrameBound::Preceding(Some(v)) => {
Self::Preceding(convert_frame_bound_to_scalar_value(*v)?)
}
ast::WindowFrameBound::Preceding(None) => Self::Preceding(ScalarValue::Null),
ast::WindowFrameBound::Following(Some(v)) => {
Self::Following(convert_frame_bound_to_scalar_value(*v)?)
}
ast::WindowFrameBound::Following(None) => Self::Following(ScalarValue::Null),
ast::WindowFrameBound::CurrentRow => Self::CurrentRow,
})
}
}

impl TryFrom<AstWrap<ast::WindowFrame>> for WindowFrame {
type Error = DataFusionError;

fn try_from(value: AstWrap<ast::WindowFrame>) -> Result<Self> {
let start_bound = AstWrap(value.0.start_bound).try_into()?;
let end_bound = match value.0.end_bound {
Some(value) => AstWrap(value).try_into()?,
None => WindowFrameBound::CurrentRow,
};

if let WindowFrameBound::Following(val) = &start_bound {
if val.is_null() {
plan_err!("Invalid window frame: start bound cannot be UNBOUNDED FOLLOWING")?
}
} else if let WindowFrameBound::Preceding(val) = &end_bound {
if val.is_null() {
plan_err!("Invalid window frame: end bound cannot be UNBOUNDED PRECEDING")?
}
};
let units = AstWrap(value.0.units).into();
Ok(Self::new_bounds(units, start_bound, end_bound))
}
}

pub fn convert_frame_bound_to_scalar_value(v: ast::Expr) -> Result<ScalarValue> {
Ok(ScalarValue::Utf8(Some(match v {
ast::Expr::Value(ast::Value::Number(value, false))
| ast::Expr::Value(ast::Value::SingleQuotedString(value)) => value,
ast::Expr::Interval(ast::Interval {
value,
leading_field,
..
}) => {
let result = match *value {
ast::Expr::Value(ast::Value::SingleQuotedString(item)) => item,
e => {
return Err(datafusion::common::sql_datafusion_err!(
datafusion::sql::sqlparser::parser::ParserError::ParserError(format!(
"INTERVAL expression cannot be {e:?}"
))
));
}
};
if let Some(leading_field) = leading_field {
format!("{result} {leading_field}")
} else {
result
}
}
_ => plan_err!("Invalid window frame: frame offsets must be non negative integers")?,
})))
}

impl From<AstWrap<FileType>> for datafusion::common::FileType {
fn from(value: AstWrap<FileType>) -> Self {
match value.0 {
FileType::ARROW => Self::ARROW,
FileType::AVRO => Self::AVRO,
FileType::PARQUET => Self::PARQUET,
FileType::CSV => Self::CSV,
FileType::JSON => Self::JSON,
}
}
}

impl From<AstWrap<CompressionTypeVariant>> for datafusion::common::parsers::CompressionTypeVariant {
fn from(value: AstWrap<CompressionTypeVariant>) -> Self {
match value.0 {
CompressionTypeVariant::GZIP => Self::GZIP,
CompressionTypeVariant::BZIP2 => Self::BZIP2,
CompressionTypeVariant::XZ => Self::XZ,
CompressionTypeVariant::ZSTD => Self::ZSTD,
CompressionTypeVariant::UNCOMPRESSED => Self::UNCOMPRESSED,
}
}
}
1 change: 1 addition & 0 deletions crates/datafusion_ext/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ pub mod runtime;
pub mod session_metrics;
pub mod vars;
pub use planner::*;
pub mod conversion;
pub mod functions;
pub mod transform;
2 changes: 1 addition & 1 deletion crates/datafusion_ext/src/planner/expr/binary_op.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use datafusion::common::{DataFusionError, Result};
use datafusion::logical_expr::Operator;
use datafusion::sql::sqlparser::ast::BinaryOperator;
use parser::sqlparser::ast::BinaryOperator;

use crate::planner::{AsyncContextProvider, SqlQueryPlanner};

Expand Down
7 changes: 5 additions & 2 deletions crates/datafusion_ext/src/planner/expr/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ use datafusion::logical_expr::{
WindowFunctionDefinition,
};
use datafusion::sql::planner::PlannerContext;
use datafusion::sql::sqlparser::ast::{
use parser::sqlparser::ast::{
Expr as SQLExpr,
Function as SQLFunction,
FunctionArg,
Expand All @@ -47,6 +47,7 @@ use datafusion::sql::sqlparser::ast::{
};

use super::arrow_cast::ARROW_CAST_NAME;
use crate::conversion::try_convert;
use crate::planner::expr::arrow_cast::create_arrow_cast;
use crate::planner::{AsyncContextProvider, SqlQueryPlanner};

Expand Down Expand Up @@ -149,7 +150,7 @@ impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> {
.window_frame
.as_ref()
.map(|window_frame| {
let window_frame = window_frame.clone().try_into()?;
let window_frame = try_convert(window_frame.clone())?;
check_window_frame(&window_frame, order_by.len()).map(|_| window_frame)
})
.transpose()?;
Expand Down Expand Up @@ -270,13 +271,15 @@ impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> {
FunctionArg::Named {
name: _,
arg: FunctionArgExpr::Expr(arg),
operator: _,
} => {
self.sql_expr_to_logical_expr(arg, schema, planner_context)
.await
}
FunctionArg::Named {
name: _,
arg: FunctionArgExpr::Wildcard,
operator: _,
} => Ok(Expr::Wildcard { qualifier: None }),
FunctionArg::Unnamed(FunctionArgExpr::Expr(arg)) => {
self.sql_expr_to_logical_expr(arg, schema, planner_context)
Expand Down
2 changes: 1 addition & 1 deletion crates/datafusion_ext/src/planner/expr/grouping_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
use datafusion::common::{DFSchema, DataFusionError, Result};
use datafusion::logical_expr::{Expr, GroupingSet};
use datafusion::sql::planner::PlannerContext;
use datafusion::sql::sqlparser::ast::Expr as SQLExpr;
use parser::sqlparser::ast::Expr as SQLExpr;

use crate::planner::{AsyncContextProvider, SqlQueryPlanner};

Expand Down
2 changes: 1 addition & 1 deletion crates/datafusion_ext/src/planner/expr/identifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use datafusion::common::{Column, DFField, DFSchema, DataFusionError, Result, Tab
use datafusion::logical_expr::{Case, Expr};
use datafusion::physical_plan::internal_err;
use datafusion::sql::planner::PlannerContext;
use datafusion::sql::sqlparser::ast::{Expr as SQLExpr, Ident};
use parser::sqlparser::ast::{Expr as SQLExpr, Ident};

use crate::planner::{AsyncContextProvider, SqlQueryPlanner};

Expand Down
2 changes: 1 addition & 1 deletion crates/datafusion_ext/src/planner/expr/json_access.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use datafusion::common::{not_impl_err, DataFusionError, Result};
use datafusion::logical_expr::Operator;
use datafusion::sql::sqlparser::ast::JsonOperator;
use parser::sqlparser::ast::JsonOperator;

use crate::planner::{AsyncContextProvider, SqlQueryPlanner};

Expand Down
6 changes: 3 additions & 3 deletions crates/datafusion_ext/src/planner/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,15 @@ use datafusion::logical_expr::{
TryCast,
};
use datafusion::sql::planner::PlannerContext;
use datafusion::sql::sqlparser::ast::{
use datafusion::sql::sqlparser::parser::ParserError::ParserError;
use parser::sqlparser::ast::{
ArrayAgg,
Expr as SQLExpr,
Interval,
JsonOperator,
TrimWhereField,
Value,
};
use datafusion::sql::sqlparser::parser::ParserError::ParserError;

use crate::planner::{AsyncContextProvider, SqlQueryPlanner};

Expand Down Expand Up @@ -229,7 +229,7 @@ impl<'a, S: AsyncContextProvider> SqlQueryPlanner<'a, S> {
if let SQLExpr::Identifier(id) = *column {
self.plan_indexed(
col(self.normalizer.normalize(id)),
keys,
keys.into_iter().map(|k| k.key).collect(),
schema,
planner_context,
)
Expand Down
2 changes: 1 addition & 1 deletion crates/datafusion_ext/src/planner/expr/order_by.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use datafusion::common::{plan_datafusion_err, plan_err, DFSchema, DataFusionErro
use datafusion::logical_expr::expr::Sort;
use datafusion::logical_expr::Expr;
use datafusion::sql::planner::PlannerContext;
use datafusion::sql::sqlparser::ast::{Expr as SQLExpr, OrderByExpr, Value};
use parser::sqlparser::ast::{Expr as SQLExpr, OrderByExpr, Value};

use crate::planner::{AsyncContextProvider, SqlQueryPlanner};

Expand Down
2 changes: 1 addition & 1 deletion crates/datafusion_ext/src/planner/expr/subquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use datafusion::common::{DFSchema, Result};
use datafusion::logical_expr::expr::{Exists, InSubquery};
use datafusion::logical_expr::{Expr, Subquery};
use datafusion::sql::planner::PlannerContext;
use datafusion::sql::sqlparser::ast::{Expr as SQLExpr, Query};
use parser::sqlparser::ast::{Expr as SQLExpr, Query};

use crate::planner::{AsyncContextProvider, SqlQueryPlanner};

Expand Down
Loading