Skip to content

Commit

Permalink
minor: inconsistent group by position planning (apache#10679)
Browse files Browse the repository at this point in the history
* fix: inconsistent group by position planning

* Update datafusion/sql/src/utils.rs

Co-authored-by: Andrew Lamb <[email protected]>

* improved error message

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
2 people authored and findepi committed Jul 16, 2024
1 parent 2ebb30b commit 36d1111
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 31 deletions.
7 changes: 3 additions & 4 deletions datafusion/sql/src/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
//
// SELECT c1, MAX(c2) AS m FROM t GROUP BY c1 HAVING MAX(c2) > 10;
//
let having_expr = resolve_aliases_to_exprs(&having_expr, &alias_map)?;
let having_expr = resolve_aliases_to_exprs(having_expr, &alias_map)?;
normalize_col(having_expr, &projected_plan)
})
.transpose()?;
Expand Down Expand Up @@ -163,10 +163,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
alias_map.remove(f.name());
}
let group_by_expr =
resolve_aliases_to_exprs(&group_by_expr, &alias_map)?;
resolve_aliases_to_exprs(group_by_expr, &alias_map)?;
let group_by_expr =
resolve_positions_to_exprs(&group_by_expr, &select_exprs)
.unwrap_or(group_by_expr);
resolve_positions_to_exprs(group_by_expr, &select_exprs)?;
let group_by_expr = normalize_col(group_by_expr, &projected_plan)?;
self.validate_schema_satisfies_exprs(
base_plan.schema(),
Expand Down
75 changes: 54 additions & 21 deletions datafusion/sql/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,47 +149,51 @@ pub(crate) fn extract_aliases(exprs: &[Expr]) -> HashMap<String, Expr> {
}

/// Given an expression that's literal int encoding position, lookup the corresponding expression
/// in the select_exprs list, if the index is within the bounds and it is indeed a position literal;
/// Otherwise, return None
/// in the select_exprs list, if the index is within the bounds and it is indeed a position literal,
/// otherwise, returns planning error.
/// If input expression is not an int literal, returns expression as-is.
pub(crate) fn resolve_positions_to_exprs(
expr: &Expr,
expr: Expr,
select_exprs: &[Expr],
) -> Option<Expr> {
) -> Result<Expr> {
match expr {
// sql_expr_to_logical_expr maps number to i64
// https://github.com/apache/datafusion/blob/8d175c759e17190980f270b5894348dc4cff9bbf/datafusion/src/sql/planner.rs#L882-L887
Expr::Literal(ScalarValue::Int64(Some(position)))
if position > &0_i64 && position <= &(select_exprs.len() as i64) =>
if position > 0_i64 && position <= select_exprs.len() as i64 =>
{
let index = (position - 1) as usize;
let select_expr = &select_exprs[index];
Some(match select_expr {
Ok(match select_expr {
Expr::Alias(Alias { expr, .. }) => *expr.clone(),
_ => select_expr.clone(),
})
}
_ => None,
Expr::Literal(ScalarValue::Int64(Some(position))) => plan_err!(
"Cannot find column with position {} in SELECT clause. Valid columns: 1 to {}",
position, select_exprs.len()
),
_ => Ok(expr),
}
}

/// Rebuilds an `Expr` with columns that refer to aliases replaced by the
/// alias' underlying `Expr`.
pub(crate) fn resolve_aliases_to_exprs(
expr: &Expr,
expr: Expr,
aliases: &HashMap<String, Expr>,
) -> Result<Expr> {
expr.clone()
.transform_up(|nested_expr| match nested_expr {
Expr::Column(c) if c.relation.is_none() => {
if let Some(aliased_expr) = aliases.get(&c.name) {
Ok(Transformed::yes(aliased_expr.clone()))
} else {
Ok(Transformed::no(Expr::Column(c)))
}
expr.transform_up(|nested_expr| match nested_expr {
Expr::Column(c) if c.relation.is_none() => {
if let Some(aliased_expr) = aliases.get(&c.name) {
Ok(Transformed::yes(aliased_expr.clone()))
} else {
Ok(Transformed::no(Expr::Column(c)))
}
_ => Ok(Transformed::no(nested_expr)),
})
.data()
}
_ => Ok(Transformed::no(nested_expr)),
})
.data()
}

/// given a slice of window expressions sharing the same sort key, find their common partition
Expand Down Expand Up @@ -346,9 +350,9 @@ mod tests {
use arrow::datatypes::{DataType as ArrowDataType, Field, Schema};
use arrow_schema::Fields;
use datafusion_common::{DFSchema, Result};
use datafusion_expr::{col, lit, unnest, EmptyRelation, LogicalPlan};
use datafusion_expr::{col, count, lit, unnest, EmptyRelation, LogicalPlan};

use crate::utils::recursive_transform_unnest;
use crate::utils::{recursive_transform_unnest, resolve_positions_to_exprs};

#[test]
fn test_recursive_transform_unnest() -> Result<()> {
Expand Down Expand Up @@ -437,4 +441,33 @@ mod tests {

Ok(())
}

#[test]
fn test_resolve_positions_to_exprs() -> Result<()> {
let select_exprs = vec![col("c1"), col("c2"), count(lit(1))];

// Assert 1 resolved as first column in select list
let resolved = resolve_positions_to_exprs(lit(1i64), &select_exprs)?;
assert_eq!(resolved, col("c1"));

// Assert error if index out of select clause bounds
let resolved = resolve_positions_to_exprs(lit(-1i64), &select_exprs);
assert!(resolved.is_err_and(|e| e.message().contains(
"Cannot find column with position -1 in SELECT clause. Valid columns: 1 to 3"
)));

let resolved = resolve_positions_to_exprs(lit(5i64), &select_exprs);
assert!(resolved.is_err_and(|e| e.message().contains(
"Cannot find column with position 5 in SELECT clause. Valid columns: 1 to 3"
)));

// Assert expression returned as-is
let resolved = resolve_positions_to_exprs(lit("text"), &select_exprs)?;
assert_eq!(resolved, lit("text"));

let resolved = resolve_positions_to_exprs(col("fake"), &select_exprs)?;
assert_eq!(resolved, col("fake"));

Ok(())
}
}
12 changes: 6 additions & 6 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1484,16 +1484,16 @@ fn select_simple_aggregate_with_groupby_position_out_of_range() {
let sql = "SELECT state, MIN(age) FROM person GROUP BY 0";
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
"Error during planning: Projection references non-aggregate values: Expression person.state could not be resolved from available columns: Int64(0), MIN(person.age)",
err.strip_backtrace()
);
"Error during planning: Cannot find column with position 0 in SELECT clause. Valid columns: 1 to 2",
err.strip_backtrace()
);

let sql2 = "SELECT state, MIN(age) FROM person GROUP BY 5";
let err2 = logical_plan(sql2).expect_err("query should have failed");
assert_eq!(
"Error during planning: Projection references non-aggregate values: Expression person.state could not be resolved from available columns: Int64(5), MIN(person.age)",
err2.strip_backtrace()
);
"Error during planning: Cannot find column with position 5 in SELECT clause. Valid columns: 1 to 2",
err2.strip_backtrace()
);
}

#[test]
Expand Down
5 changes: 5 additions & 0 deletions datafusion/sqllogictest/test_files/group_by.slt
Original file line number Diff line number Diff line change
Expand Up @@ -5059,3 +5059,8 @@ query II
SELECT a + 1 AS d, a + 1 + b AS c FROM (SELECT 1 AS a, 2 AS b) GROUP BY a + 1, a + 1 + b;
----
2 4

statement error DataFusion error: Error during planning: Cannot find column with position 4 in SELECT clause. Valid columns: 1 to 3
SELECT a, b, COUNT(1)
FROM multiple_ordered_table
GROUP BY 1, 2, 4, 5, 6;

0 comments on commit 36d1111

Please sign in to comment.