From e93793e57199463ef73480dc4dad1664af239127 Mon Sep 17 00:00:00 2001 From: comphead Date: Mon, 29 Apr 2024 16:56:14 -0700 Subject: [PATCH 1/4] Fix: Sort Merge Join crashes on TPCH Q21 --- datafusion/physical-plan/src/joins/sort_merge_join.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs index 143a726d31b1..d8cde0a3edc1 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs @@ -1416,6 +1416,9 @@ fn get_filter_column( .map(|i| buffered_columns[i.index].clone()) .collect::>(); +// dbg!(&left_columns); +// dbg!(&right_columns); + filter_columns.extend(left_columns); filter_columns.extend(right_columns); } From 4d20995459b8d6d231f10469b1dfe7e77bc5b0fa Mon Sep 17 00:00:00 2001 From: comphead Date: Thu, 30 May 2024 08:42:29 -0700 Subject: [PATCH 2/4] Fix LeftAnti SMJ join when the join filter is set --- datafusion/physical-plan/src/joins/sort_merge_join.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs index d8cde0a3edc1..72e23e4d7b77 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs @@ -1416,8 +1416,8 @@ fn get_filter_column( .map(|i| buffered_columns[i.index].clone()) .collect::>(); -// dbg!(&left_columns); -// dbg!(&right_columns); + // dbg!(&left_columns); + // dbg!(&right_columns); filter_columns.extend(left_columns); filter_columns.extend(right_columns); From 591594c0ac56c77097e92f57f32b2a07d2955750 Mon Sep 17 00:00:00 2001 From: comphead Date: Fri, 31 May 2024 08:35:50 -0700 Subject: [PATCH 3/4] rm dbg --- datafusion/physical-plan/src/joins/sort_merge_join.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs index 72e23e4d7b77..143a726d31b1 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs @@ -1416,9 +1416,6 @@ fn get_filter_column( .map(|i| buffered_columns[i.index].clone()) .collect::>(); - // dbg!(&left_columns); - // dbg!(&right_columns); - filter_columns.extend(left_columns); filter_columns.extend(right_columns); } From 8353d20b56bbabdaab8104f3cc2ba022421385c4 Mon Sep 17 00:00:00 2001 From: comphead Date: Fri, 31 May 2024 15:11:03 -0700 Subject: [PATCH 4/4] Add SMJ to TPCH benchmark usage --- benchmarks/bench.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh index 49e65eafac9a..87d0720ccb63 100755 --- a/benchmarks/bench.sh +++ b/benchmarks/bench.sh @@ -66,9 +66,11 @@ compare: Compares results from benchmark runs * Benchmarks ********** all(default): Data/Run/Compare for all benchmarks -tpch: TPCH inspired benchmark on Scale Factor (SF) 1 (~1GB), single parquet file per table +tpch: TPCH inspired benchmark on Scale Factor (SF) 1 (~1GB), single parquet file per table, hash join +tpch_smj: TPCH inspired benchmark on Scale Factor (SF) 1 (~1GB), single parquet file per table, sort merge join tpch_mem: TPCH inspired benchmark on Scale Factor (SF) 1 (~1GB), query from memory -tpch10: TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), single parquet file per table +tpch10: TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), single parquet file per table, hash join +tpch_smj10: TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), single parquet file per table, sort merge join tpch_mem10: TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), query from memory parquet: Benchmark of parquet reader's filtering speed sort: Benchmark of sorting speed