Skip to content

Commit 819d357

Browse files
Add support for functional dependency for ROW_NUMBER window function. (#8737)
* Add primary key support for row_number window function * Add comments, minor changes * Add new test * Review --------- Co-authored-by: Mehmet Ozan Kabak <[email protected]>
1 parent e6b9f52 commit 819d357

File tree

2 files changed

+91
-8
lines changed

2 files changed

+91
-8
lines changed

datafusion/expr/src/logical_plan/plan.rs

+52-7
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ use std::sync::Arc;
2525
use super::dml::CopyTo;
2626
use super::DdlStatement;
2727
use crate::dml::CopyOptions;
28-
use crate::expr::{Alias, Exists, InSubquery, Placeholder, Sort as SortExpr};
28+
use crate::expr::{
29+
Alias, Exists, InSubquery, Placeholder, Sort as SortExpr, WindowFunction,
30+
};
2931
use crate::expr_rewriter::{create_col_from_scalar_expr, normalize_cols};
3032
use crate::logical_plan::display::{GraphvizVisitor, IndentVisitor};
3133
use crate::logical_plan::extension::UserDefinedLogicalNode;
@@ -36,9 +38,9 @@ use crate::utils::{
3638
split_conjunction,
3739
};
3840
use crate::{
39-
build_join_schema, expr_vec_fmt, BinaryExpr, CreateMemoryTable, CreateView, Expr,
40-
ExprSchemable, LogicalPlanBuilder, Operator, TableProviderFilterPushDown,
41-
TableSource,
41+
build_join_schema, expr_vec_fmt, BinaryExpr, BuiltInWindowFunction,
42+
CreateMemoryTable, CreateView, Expr, ExprSchemable, LogicalPlanBuilder, Operator,
43+
TableProviderFilterPushDown, TableSource, WindowFunctionDefinition,
4244
};
4345

4446
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
@@ -48,9 +50,10 @@ use datafusion_common::tree_node::{
4850
};
4951
use datafusion_common::{
5052
aggregate_functional_dependencies, internal_err, plan_err, Column, Constraints,
51-
DFField, DFSchema, DFSchemaRef, DataFusionError, Dependency, FunctionalDependencies,
52-
OwnedTableReference, ParamValues, Result, UnnestOptions,
53+
DFField, DFSchema, DFSchemaRef, DataFusionError, Dependency, FunctionalDependence,
54+
FunctionalDependencies, OwnedTableReference, ParamValues, Result, UnnestOptions,
5355
};
56+
5457
// backwards compatibility
5558
pub use datafusion_common::display::{PlanType, StringifiedPlan, ToStringifiedPlan};
5659
pub use datafusion_common::{JoinConstraint, JoinType};
@@ -1967,7 +1970,9 @@ pub struct Window {
19671970
impl Window {
19681971
/// Create a new window operator.
19691972
pub fn try_new(window_expr: Vec<Expr>, input: Arc<LogicalPlan>) -> Result<Self> {
1970-
let mut window_fields: Vec<DFField> = input.schema().fields().clone();
1973+
let fields = input.schema().fields();
1974+
let input_len = fields.len();
1975+
let mut window_fields = fields.clone();
19711976
window_fields.extend_from_slice(&exprlist_to_fields(window_expr.iter(), &input)?);
19721977
let metadata = input.schema().metadata().clone();
19731978

@@ -1976,6 +1981,46 @@ impl Window {
19761981
input.schema().functional_dependencies().clone();
19771982
window_func_dependencies.extend_target_indices(window_fields.len());
19781983

1984+
// Since we know that ROW_NUMBER outputs will be unique (i.e. it consists
1985+
// of consecutive numbers per partition), we can represent this fact with
1986+
// functional dependencies.
1987+
let mut new_dependencies = window_expr
1988+
.iter()
1989+
.enumerate()
1990+
.filter_map(|(idx, expr)| {
1991+
if let Expr::WindowFunction(WindowFunction {
1992+
// Function is ROW_NUMBER
1993+
fun:
1994+
WindowFunctionDefinition::BuiltInWindowFunction(
1995+
BuiltInWindowFunction::RowNumber,
1996+
),
1997+
partition_by,
1998+
..
1999+
}) = expr
2000+
{
2001+
// When there is no PARTITION BY, row number will be unique
2002+
// across the entire table.
2003+
if partition_by.is_empty() {
2004+
return Some(idx + input_len);
2005+
}
2006+
}
2007+
None
2008+
})
2009+
.map(|idx| {
2010+
FunctionalDependence::new(vec![idx], vec![], false)
2011+
.with_mode(Dependency::Single)
2012+
})
2013+
.collect::<Vec<_>>();
2014+
2015+
if !new_dependencies.is_empty() {
2016+
for dependence in new_dependencies.iter_mut() {
2017+
dependence.target_indices = (0..window_fields.len()).collect();
2018+
}
2019+
// Add the dependency introduced because of ROW_NUMBER window function to the functional dependency
2020+
let new_deps = FunctionalDependencies::new(new_dependencies);
2021+
window_func_dependencies.extend(new_deps);
2022+
}
2023+
19792024
Ok(Window {
19802025
input,
19812026
window_expr,

datafusion/sqllogictest/test_files/window.slt

+39-1
Original file line numberDiff line numberDiff line change
@@ -3832,4 +3832,42 @@ select row_number() over (partition by 1 order by 1) rn,
38323832
from (select 1 a union all select 2 a) x;
38333833
----
38343834
1 1 1 1 1 1
3835-
2 1 1 2 2 1
3835+
2 1 1 2 2 1
3836+
3837+
# when partition by expression is empty row number result will be unique.
3838+
query TII
3839+
SELECT *
3840+
FROM (SELECT c1, c2, ROW_NUMBER() OVER() as rn
3841+
FROM aggregate_test_100
3842+
LIMIT 5)
3843+
GROUP BY rn
3844+
ORDER BY rn;
3845+
----
3846+
c 2 1
3847+
d 5 2
3848+
b 1 3
3849+
a 1 4
3850+
b 5 5
3851+
3852+
# when partition by expression is constant row number result will be unique.
3853+
query TII
3854+
SELECT *
3855+
FROM (SELECT c1, c2, ROW_NUMBER() OVER(PARTITION BY 3) as rn
3856+
FROM aggregate_test_100
3857+
LIMIT 5)
3858+
GROUP BY rn
3859+
ORDER BY rn;
3860+
----
3861+
c 2 1
3862+
d 5 2
3863+
b 1 3
3864+
a 1 4
3865+
b 5 5
3866+
3867+
statement error DataFusion error: Error during planning: Projection references non-aggregate values: Expression aggregate_test_100.c1 could not be resolved from available columns: rn
3868+
SELECT *
3869+
FROM (SELECT c1, c2, ROW_NUMBER() OVER(PARTITION BY c1) as rn
3870+
FROM aggregate_test_100
3871+
LIMIT 5)
3872+
GROUP BY rn
3873+
ORDER BY rn;

0 commit comments

Comments
 (0)