diff --git a/datafusion/optimizer/src/optimize_projections.rs b/datafusion/optimizer/src/optimize_projections.rs index f87f5fdea99f..103599564252 100644 --- a/datafusion/optimizer/src/optimize_projections.rs +++ b/datafusion/optimizer/src/optimize_projections.rs @@ -218,6 +218,22 @@ fn optimize_projections( // Only use the absolutely necessary aggregate expressions required // by the parent: let mut new_aggr_expr = get_at_indices(&aggregate.aggr_expr, &aggregate_reqs); + + // Aggregations always need at least one aggregate expression. + // With a nested count, we don't require any column as input, but + // still need to create a correct aggregate, which may be optimized + // out later. As an example, consider the following query: + // + // SELECT COUNT(*) FROM (SELECT COUNT(*) FROM [...]) + // + // which always returns 1. + if new_aggr_expr.is_empty() + && new_group_bys.is_empty() + && !aggregate.aggr_expr.is_empty() + { + new_aggr_expr = vec![aggregate.aggr_expr[0].clone()]; + } + let all_exprs_iter = new_group_bys.iter().chain(new_aggr_expr.iter()); let schema = aggregate.input.schema(); let necessary_indices = indices_referred_by_exprs(schema, all_exprs_iter)?; @@ -238,21 +254,6 @@ fn optimize_projections( let (aggregate_input, _) = add_projection_on_top_if_helpful(aggregate_input, necessary_exprs)?; - // Aggregations always need at least one aggregate expression. - // With a nested count, we don't require any column as input, but - // still need to create a correct aggregate, which may be optimized - // out later. As an example, consider the following query: - // - // SELECT COUNT(*) FROM (SELECT COUNT(*) FROM [...]) - // - // which always returns 1. - if new_aggr_expr.is_empty() - && new_group_bys.is_empty() - && !aggregate.aggr_expr.is_empty() - { - new_aggr_expr = vec![aggregate.aggr_expr[0].clone()]; - } - // Create a new aggregate plan with the updated input and only the // absolutely necessary fields: return Aggregate::try_new( diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index 5216b14cb2d2..50c62eff7772 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -1527,3 +1527,23 @@ SELECT to_timestamp('I AM NOT A TIMESTAMP'); query error DataFusion error: Arrow error: Cast error: Cannot cast string '' to value of Int32 type SELECT CAST('' AS int); + +# See issue: https://github.com/apache/arrow-datafusion/issues/8978 +statement ok +create table users (id int, name varchar); + +statement ok +insert into users values (1, 'Tom'); + +statement ok +create view v as select count(id) from users; + +query I +select * from v; +---- +1 + +query I +select count(1) from v; +---- +1