diff --git a/README.md b/README.md index c193309..33d8ab8 100644 --- a/README.md +++ b/README.md @@ -145,26 +145,6 @@ export TPCH_DATA_PATH=`pwd`/data cargo test ``` -Tests compare plans with expected plans, which unfortunately contain the -path to the parquet tables. The path committed under version control is -the one of a Github Runner, and won't work locally. You can fix it by -running the following command: - -```bash -./scripts/replace-expected-plan-paths.sh local-dev -```` - -When instead you need to regenerate the plans, which you can do by -re-running the planner tests removing all the content of -`testdata/expected-plans`, they will now contain your local paths. You can -fix it before committing the plans running - -```bash - -./scripts/replace-expected-plan-paths.sh pre-ci - -``` - ## Benchmarking Create a release build when running benchmarks, then use pip to install the wheel. diff --git a/src/planner.rs b/src/planner.rs index c9cab3b..7a3824d 100644 --- a/src/planner.rs +++ b/src/planner.rs @@ -394,8 +394,8 @@ mod test { let file = format!("testdata/queries/q{n}.sql"); let sql = fs::read_to_string(&file)?; - let config = SessionConfig::new().with_target_partitions(1); - let ctx = SessionContext::with_config(config); + let config = SessionConfig::new().with_target_partitions(2); + let ctx = SessionContext::new_with_config(config); let tables = &[ "customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier", ]; @@ -423,7 +423,7 @@ mod test { displayable(plan.as_ref()).indent(false) )); - output.push_str("RaySQL Plan\n===========\n\n"); + output.push_str("DataFusion Ray Distributed Plan\n===========\n\n"); let graph = make_execution_graph(plan, false)?; for id in 0..=graph.get_final_query_stage().id { let query_stage = graph.query_stages.get(&id).unwrap(); diff --git a/testdata/expected-plans/q1.txt b/testdata/expected-plans/q1.txt index 37939e7..3b1f94a 100644 --- a/testdata/expected-plans/q1.txt +++ b/testdata/expected-plans/q1.txt @@ -11,23 +11,38 @@ Sort: lineitem.l_returnflag ASC NULLS LAST, lineitem.l_linestatus ASC NULLS LAST DataFusion Physical Plan ======================== -SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] - AggregateExec: mode=Single, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] - ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] +SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST] + SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] + AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@6 <= 1998-09-24 - ParquetExec: file_groups={ ... }, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] + RepartitionExec: partitioning=Hash([l_returnflag@0, l_linestatus@1], 2), input_partitions=2 + AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@6 <= 1998-09-24 + ParquetExec: file_groups={ ... }, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] -RaySQL Plan +DataFusion Ray Distributed Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] - AggregateExec: mode=Single, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] - ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] +Query Stage #0 (2 -> 2): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 2)) + AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@6 <= 1998-09-24 + ParquetExec: file_groups={ ... }, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] + +Query Stage #1 (2 -> 2): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 2)) + SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] + AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@6 <= 1998-09-24 - ParquetExec: file_groups={ ... }, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 2)) + +Query Stage #2 (2 -> 1): +SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST] + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 2)) diff --git a/testdata/expected-plans/q10.txt b/testdata/expected-plans/q10.txt index 2b608fe..23f582b 100644 --- a/testdata/expected-plans/q10.txt +++ b/testdata/expected-plans/q10.txt @@ -23,49 +23,108 @@ Limit: skip=0, fetch=20 DataFusion Physical Plan ======================== -SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] - AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] - ParquetExec: file_groups={ ... }, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] +GlobalLimitExec: skip=0, fetch=20 + SortPreservingMergeExec: [revenue@2 DESC], fetch=20 + SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] + AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0, c_name@1, c_acctbal@2, c_phone@3, n_name@4, c_address@5, c_comment@6], 2), input_partitions=2 + AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_returnflag@3 = R - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_nationkey@3], 2), input_partitions=2 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@7], 2), input_partitions=2 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@1], 2), input_partitions=2 + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 2), input_partitions=2 + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_returnflag@3 = R + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] -RaySQL Plan +DataFusion Ray Distributed Plan =========== -Query Stage #0 (1 -> 1): -SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] - AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] +Query Stage #0 (1 -> 2): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name] + +Query Stage #1 (2 -> 2): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] + +Query Stage #2 (2 -> 2): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 2)) + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] + +Query Stage #3 (2 -> 2): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 7 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 2)) + +Query Stage #4 (2 -> 2): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_returnflag@3 = R + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] + +Query Stage #5 (2 -> 2): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderkey", index: 7 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + +Query Stage #6 (2 -> 2): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 2 }, Column { name: "c_phone", index: 3 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 6 }], 2)) + AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 2)) + +Query Stage #7 (2 -> 2): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 3 }, Column { name: "c_phone", index: 6 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 7 }], 2)) + SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] + AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] - ParquetExec: file_groups={ ... }, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_returnflag@3 = R - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 2 }, Column { name: "c_phone", index: 3 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 6 }], 2)) + +Query Stage #8 (1 -> 1): +GlobalLimitExec: skip=0, fetch=20 + SortPreservingMergeExec: [revenue@2 DESC], fetch=20 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 3 }, Column { name: "c_phone", index: 6 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 7 }], 2)) diff --git a/testdata/expected-plans/q11.txt b/testdata/expected-plans/q11.txt index 945d66b..58319a0 100644 --- a/testdata/expected-plans/q11.txt +++ b/testdata/expected-plans/q11.txt @@ -30,65 +30,148 @@ Sort: value DESC NULLS FIRST DataFusion Physical Plan ======================== -SortExec: expr=[value@1 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] - NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 - ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] - AggregateExec: mode=Single, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] +SortPreservingMergeExec: [value@1 DESC] + SortExec: expr=[value@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] + NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 + ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] + AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] - ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@2], 2), input_partitions=2 + ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@0], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[ps_suppkey, ps_availqty, ps_supplycost] + AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_partkey@0], 2), input_partitions=2 + AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={ ... }, projection=[ps_suppkey, ps_availqty, ps_supplycost] - AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] - -RaySQL Plan + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@3], 2), input_partitions=2 + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@1], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + +DataFusion Ray Distributed Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[value@1 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] - NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 - ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] - AggregateExec: mode=Single, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] - ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={ ... }, projection=[ps_suppkey, ps_availqty, ps_supplycost] - AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] +Query Stage #0 (1 -> 2): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + +Query Stage #1 (1 -> 2): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] + +Query Stage #2 (2 -> 2): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[ps_suppkey, ps_availqty, ps_supplycost] + +Query Stage #3 (2 -> 2): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2)) + ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 2)) + +Query Stage #4 (2 -> 1): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([], 2)) + AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2)) + +Query Stage #5 (1 -> 2): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + +Query Stage #6 (1 -> 2): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] + +Query Stage #7 (2 -> 2): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 2)) + ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + +Query Stage #8 (2 -> 2): +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 2)) + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 2)) + +Query Stage #9 (2 -> 2): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2)) + AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 2)) + +Query Stage #10 (2 -> 2): +ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2)) + SortExec: expr=[value@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] + NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 + ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] + AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalescePartitionsExec + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([], 2)) + AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2)) + +Query Stage #11 (2 -> 1): +SortPreservingMergeExec: [value@1 DESC] + ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2)) diff --git a/testdata/expected-plans/q13.txt b/testdata/expected-plans/q13.txt index 6733b94..428dd9f 100644 --- a/testdata/expected-plans/q13.txt +++ b/testdata/expected-plans/q13.txt @@ -17,33 +17,62 @@ Sort: custdist DESC NULLS FIRST, c_orders.c_count DESC NULLS FIRST DataFusion Physical Plan ======================== -SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] - AggregateExec: mode=Single, gby=[c_count@0 as c_count], aggr=[count(*)] - ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] - AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] - ParquetExec: file_groups={ ... }, projection=[c_custkey] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% - -RaySQL Plan +SortPreservingMergeExec: [custdist@1 DESC,c_count@0 DESC] + SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] + AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_count@0], 2), input_partitions=2 + AggregateExec: mode=Partial, gby=[c_count@0 as c_count], aggr=[count(*)] + ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] + AggregateExec: mode=SinglePartitioned, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[c_custkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@1], 2), input_partitions=2 + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_comment@2 NOT LIKE %express%requests% + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% + +DataFusion Ray Distributed Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] - AggregateExec: mode=Single, gby=[c_count@0 as c_count], aggr=[count(*)] - ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] - AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] - ParquetExec: file_groups={ ... }, projection=[c_custkey] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% +Query Stage #0 (2 -> 2): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[c_custkey] + +Query Stage #1 (2 -> 2): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 2)) + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_comment@2 NOT LIKE %express%requests% + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% + +Query Stage #2 (2 -> 2): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "c_count", index: 0 }], 2)) + AggregateExec: mode=Partial, gby=[c_count@0 as c_count], aggr=[count(*)] + ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] + AggregateExec: mode=SinglePartitioned, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 2)) + +Query Stage #3 (2 -> 2): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_count", index: 0 }], 2)) + SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] + AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "c_count", index: 0 }], 2)) + +Query Stage #4 (2 -> 1): +SortPreservingMergeExec: [custdist@1 DESC,c_count@0 DESC] + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "c_count", index: 0 }], 2)) diff --git a/testdata/expected-plans/q14.txt b/testdata/expected-plans/q14.txt index ba862ce..383958c 100644 --- a/testdata/expected-plans/q14.txt +++ b/testdata/expected-plans/q14.txt @@ -14,28 +14,51 @@ DataFusion Physical Plan ======================== ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] - AggregateExec: mode=Single, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] - ParquetExec: file_groups={ ... }, projection=[p_partkey, p_type] - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 - ParquetExec: file_groups={ ... }, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] - -RaySQL Plan + AggregateExec: mode=Final, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[p_partkey, p_type] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_partkey@0], 2), input_partitions=2 + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 + ParquetExec: file_groups={ ... }, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] + +DataFusion Ray Distributed Plan =========== -Query Stage #0 (1 -> 1): -ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] - AggregateExec: mode=Single, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +Query Stage #0 (1 -> 2): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[p_partkey, p_type] + +Query Stage #1 (2 -> 2): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 2)) + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 + ParquetExec: file_groups={ ... }, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] + +Query Stage #2 (2 -> 1): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([], 2)) + AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] - ParquetExec: file_groups={ ... }, projection=[p_partkey, p_type] - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 - ParquetExec: file_groups={ ... }, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 2)) + +Query Stage #3 (1 -> 1): +ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] + AggregateExec: mode=Final, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalescePartitionsExec + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([], 2)) diff --git a/testdata/expected-plans/q17.txt b/testdata/expected-plans/q17.txt index 5ee76e6..89f1ec7 100644 --- a/testdata/expected-plans/q17.txt +++ b/testdata/expected-plans/q17.txt @@ -20,38 +20,70 @@ DataFusion Physical Plan ======================== ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] - AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] - ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] - ProjectionExec: expr=[p_partkey@0 as p_partkey] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 2), input_partitions=2 + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_partkey@0], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[l_partkey, l_quantity, l_extendedprice] + ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] + AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG - ParquetExec: file_groups={ ... }, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] - ParquetExec: file_groups={ ... }, projection=[l_partkey, l_quantity, l_extendedprice] - ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] - AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - ParquetExec: file_groups={ ... }, projection=[l_partkey, l_quantity] + RepartitionExec: partitioning=Hash([l_partkey@0], 2), input_partitions=2 + AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] + ParquetExec: file_groups={ ... }, projection=[l_partkey, l_quantity] -RaySQL Plan +DataFusion Ray Distributed Plan =========== -Query Stage #0 (1 -> 1): -ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] - AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice)] +Query Stage #0 (1 -> 2): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2)) + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG + ParquetExec: file_groups={ ... }, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + +Query Stage #1 (2 -> 2): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[l_partkey, l_quantity, l_extendedprice] + +Query Stage #2 (2 -> 2): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 2)) + AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] + ParquetExec: file_groups={ ... }, projection=[l_partkey, l_quantity] + +Query Stage #3 (2 -> 1): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([], 2)) + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG - ParquetExec: file_groups={ ... }, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] - ParquetExec: file_groups={ ... }, projection=[l_partkey, l_quantity, l_extendedprice] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 2)) ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] - AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - ParquetExec: file_groups={ ... }, projection=[l_partkey, l_quantity] + AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 2)) + +Query Stage #4 (1 -> 1): +ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice)] + CoalescePartitionsExec + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([], 2)) diff --git a/testdata/expected-plans/q18.txt b/testdata/expected-plans/q18.txt index 421e7ce..8547084 100644 --- a/testdata/expected-plans/q18.txt +++ b/testdata/expected-plans/q18.txt @@ -21,41 +21,95 @@ Limit: skip=0, fetch=100 DataFusion Physical Plan ======================== -SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[false] - AggregateExec: mode=Single, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 - AggregateExec: mode=Single, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_quantity] +GlobalLimitExec: skip=0, fetch=100 + SortPreservingMergeExec: [o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], fetch=100 + SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[true] + AggregateExec: mode=FinalPartitioned, gby=[c_name@0 as c_name, c_custkey@1 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@3 as o_orderdate, o_totalprice@4 as o_totalprice], aggr=[sum(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] - ParquetExec: file_groups={ ... }, projection=[c_custkey, c_name] - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_quantity] + RepartitionExec: partitioning=Hash([c_name@0, c_custkey@1, o_orderkey@2, o_orderdate@3, o_totalprice@4], 2), input_partitions=2 + AggregateExec: mode=Partial, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 + AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 2), input_partitions=2 + AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_quantity] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@2], 2), input_partitions=2 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[c_custkey, c_name] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@1], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_quantity] -RaySQL Plan +DataFusion Ray Distributed Plan =========== -Query Stage #0 (1 -> 1): -SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[false] - AggregateExec: mode=Single, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] +Query Stage #0 (2 -> 2): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_quantity] + +Query Stage #1 (2 -> 2): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[c_custkey, c_name] + +Query Stage #2 (2 -> 2): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 2)) + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + +Query Stage #3 (2 -> 2): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 2 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 2)) + +Query Stage #4 (2 -> 2): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_quantity] + +Query Stage #5 (2 -> 2): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 2)) + AggregateExec: mode=Partial, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] + HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] ProjectionExec: expr=[l_orderkey@0 as l_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 - AggregateExec: mode=Single, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_quantity] + AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] - ParquetExec: file_groups={ ... }, projection=[c_custkey, c_name] - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_quantity] + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderkey", index: 2 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + +Query Stage #6 (2 -> 2): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 2)) + SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[true] + AggregateExec: mode=FinalPartitioned, gby=[c_name@0 as c_name, c_custkey@1 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@3 as o_orderdate, o_totalprice@4 as o_totalprice], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 2)) + +Query Stage #7 (1 -> 1): +GlobalLimitExec: skip=0, fetch=100 + SortPreservingMergeExec: [o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], fetch=100 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 2)) diff --git a/testdata/expected-plans/q2.txt b/testdata/expected-plans/q2.txt index 94a341d..7e8f91b 100644 --- a/testdata/expected-plans/q2.txt +++ b/testdata/expected-plans/q2.txt @@ -41,93 +41,227 @@ Limit: skip=0, fetch=100 DataFusion Physical Plan ======================== -SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] +GlobalLimitExec: skip=0, fetch=100 + SortPreservingMergeExec: [s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], fetch=100 + SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={ ... }, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] - ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name, n_regionkey] - ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0, ps_supplycost@7], 2), input_partitions=2 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([r_regionkey@0], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={ ... }, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] + RepartitionExec: partitioning=Hash([n_regionkey@9], 2), input_partitions=2 + ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN - ParquetExec: file_groups={ ... }, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] - ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_supplycost] - ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] - AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] + RepartitionExec: partitioning=Hash([n_nationkey@0], 2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name, n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@4], 2), input_partitions=2 + ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@2], 2), input_partitions=2 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 2), input_partitions=2 + ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_partkey@0], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={ ... }, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_regionkey] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_supplycost] - -RaySQL Plan + RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 2), input_partitions=2 + ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] + AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_partkey@0], 2), input_partitions=2 + AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([r_regionkey@0], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={ ... }, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_regionkey@2], 2), input_partitions=2 + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@2], 2), input_partitions=2 + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@1], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_supplycost] + +DataFusion Ray Distributed Plan =========== -Query Stage #0 (1 -> 1): -SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] +Query Stage #0 (1 -> 2): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 2)) + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={ ... }, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + +Query Stage #1 (1 -> 2): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name, n_regionkey] + +Query Stage #2 (1 -> 2): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + +Query Stage #3 (1 -> 2): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2)) + ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] + FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN + ParquetExec: file_groups={ ... }, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] + +Query Stage #4 (2 -> 2): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_supplycost] + +Query Stage #5 (2 -> 2): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "ps_suppkey", index: 2 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2)) + +Query Stage #6 (2 -> 2): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "s_nationkey", index: 4 }], 2)) + ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={ ... }, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] - ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name, n_regionkey] - ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN - ParquetExec: file_groups={ ... }, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] - ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_supplycost] - ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] - AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={ ... }, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_regionkey] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "ps_suppkey", index: 2 }], 2)) + +Query Stage #7 (2 -> 2): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "n_regionkey", index: 9 }], 2)) + ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "s_nationkey", index: 4 }], 2)) + +Query Stage #8 (2 -> 2): +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }, Column { name: "ps_supplycost", index: 7 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "n_regionkey", index: 9 }], 2)) + +Query Stage #9 (1 -> 2): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 2)) + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={ ... }, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + +Query Stage #10 (1 -> 2): +ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_regionkey] + +Query Stage #11 (1 -> 2): +ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] + +Query Stage #12 (2 -> 2): +ShuffleWriterExec(stage_id=12, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 2)) + ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_supplycost] + +Query Stage #13 (2 -> 2): +ShuffleWriterExec(stage_id=13, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2)) + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=12, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 2)) + +Query Stage #14 (2 -> 2): +ShuffleWriterExec(stage_id=14, output_partitioning=Hash([Column { name: "n_regionkey", index: 2 }], 2)) + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=13, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2)) + +Query Stage #15 (2 -> 2): +ShuffleWriterExec(stage_id=15, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2)) + AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=14, input_partitioning=Hash([Column { name: "n_regionkey", index: 2 }], 2)) + +Query Stage #16 (2 -> 2): +ShuffleWriterExec(stage_id=16, output_partitioning=Hash([Column { name: "ps_partkey", index: 1 }, Column { name: "min(partsupp.ps_supplycost)", index: 0 }], 2)) + ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] + AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=15, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2)) + +Query Stage #17 (2 -> 2): +ShuffleWriterExec(stage_id=17, output_partitioning=Hash([Column { name: "p_partkey", index: 3 }], 2)) + SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }, Column { name: "ps_supplycost", index: 7 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=16, input_partitioning=Hash([Column { name: "ps_partkey", index: 1 }, Column { name: "min(partsupp.ps_supplycost)", index: 0 }], 2)) + +Query Stage #18 (1 -> 1): +GlobalLimitExec: skip=0, fetch=100 + SortPreservingMergeExec: [s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], fetch=100 + ShuffleReaderExec(stage_id=17, input_partitioning=Hash([Column { name: "p_partkey", index: 3 }], 2)) diff --git a/testdata/expected-plans/q20.txt b/testdata/expected-plans/q20.txt index 7de9a36..2d5f70f 100644 --- a/testdata/expected-plans/q20.txt +++ b/testdata/expected-plans/q20.txt @@ -29,59 +29,126 @@ Sort: supplier.s_name ASC NULLS LAST DataFusion Physical Plan ======================== -SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[false] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] +SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] + SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[true] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 2), input_partitions=2 CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = KENYA - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_name, s_address, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] - ProjectionExec: expr=[p_partkey@0 as p_partkey] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = KENYA + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE blanched% - ParquetExec: file_groups={ ... }, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% - ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_availqty] - ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] - AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] + RepartitionExec: partitioning=Hash([s_nationkey@3], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_name, s_address, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@0], 2), input_partitions=2 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 - ParquetExec: file_groups={ ... }, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] + RepartitionExec: partitioning=Hash([ps_partkey@0, ps_suppkey@1], 2), input_partitions=2 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 2), input_partitions=2 + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_name@1 LIKE blanched% + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_partkey@0], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_availqty] + ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] + AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_partkey@0, l_suppkey@1], 2), input_partitions=2 + AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 + ParquetExec: file_groups={ ... }, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] -RaySQL Plan +DataFusion Ray Distributed Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[false] +Query Stage #0 (1 -> 2): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = KENYA + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] + +Query Stage #1 (1 -> 2): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 2)) + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_name, s_address, s_nationkey] + +Query Stage #2 (2 -> 2): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2)) CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = KENYA - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_name, s_address, s_nationkey] + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 2)) + +Query Stage #3 (1 -> 2): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2)) + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_name@1 LIKE blanched% + ParquetExec: file_groups={ ... }, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + +Query Stage #4 (2 -> 2): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_availqty] + +Query Stage #5 (2 -> 2): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "ps_suppkey", index: 1 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2)) + +Query Stage #6 (2 -> 2): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }, Column { name: "l_suppkey", index: 1 }], 2)) + AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 + ParquetExec: file_groups={ ... }, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] + +Query Stage #7 (2 -> 2): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "ps_suppkey", index: 1 }], 2)) + ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] + AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE blanched% - ParquetExec: file_groups={ ... }, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% - ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_availqty] - ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] - AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 - ParquetExec: file_groups={ ... }, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }, Column { name: "l_suppkey", index: 1 }], 2)) + +Query Stage #8 (2 -> 1): +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([], 2)) + SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[true] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 2)) + +Query Stage #9 (2 -> 1): +SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([], 2)) diff --git a/testdata/expected-plans/q21.txt b/testdata/expected-plans/q21.txt index c482eec..de59694 100644 --- a/testdata/expected-plans/q21.txt +++ b/testdata/expected-plans/q21.txt @@ -37,71 +37,153 @@ Limit: skip=0, fetch=100 DataFusion Physical Plan ======================== -SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] - AggregateExec: mode=Single, gby=[s_name@0 as s_name], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] +GlobalLimitExec: skip=0, fetch=100 + SortPreservingMergeExec: [numwait@1 DESC,s_name@0 ASC NULLS LAST], fetch=100 + SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] + AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[count(*)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + RepartitionExec: partitioning=Hash([s_name@0], 2), input_partitions=2 + AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ARGENTINA - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey] + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderstatus@1 = F - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_name, s_nationkey] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + RepartitionExec: partitioning=Hash([l_orderkey@1], 2), input_partitions=2 CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_suppkey] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ARGENTINA + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@1], 2), input_partitions=2 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 2), input_partitions=2 + ProjectionExec: expr=[o_orderkey@0 as o_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderstatus@1 = F + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@2], 2), input_partitions=2 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_name, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_suppkey@1], 2), input_partitions=2 + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 2), input_partitions=2 + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 -RaySQL Plan +DataFusion Ray Distributed Plan =========== -Query Stage #0 (1 -> 1): -SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] - AggregateExec: mode=Single, gby=[s_name@0 as s_name], aggr=[count(*)] +Query Stage #0 (1 -> 2): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ARGENTINA + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] + +Query Stage #1 (2 -> 2): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2)) + ProjectionExec: expr=[o_orderkey@0 as o_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderstatus@1 = F + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] + +Query Stage #2 (1 -> 2): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_name, s_nationkey] + +Query Stage #3 (2 -> 2): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 2)) + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + +Query Stage #4 (2 -> 2): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 2 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ARGENTINA - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderstatus@1 = F - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_name, s_nationkey] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_suppkey] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 2)) + +Query Stage #5 (2 -> 2): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "s_nationkey", index: 1 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 2 }], 2)) + +Query Stage #6 (2 -> 2): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "s_nationkey", index: 1 }], 2)) + +Query Stage #7 (2 -> 2): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_suppkey] + +Query Stage #8 (2 -> 2): +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + +Query Stage #9 (2 -> 2): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "s_name", index: 0 }], 2)) + AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + +Query Stage #10 (2 -> 2): +ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "s_name", index: 0 }], 2)) + SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] + AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "s_name", index: 0 }], 2)) + +Query Stage #11 (1 -> 1): +GlobalLimitExec: skip=0, fetch=100 + SortPreservingMergeExec: [numwait@1 DESC,s_name@0 ASC NULLS LAST], fetch=100 + ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "s_name", index: 0 }], 2)) diff --git a/testdata/expected-plans/q22.txt b/testdata/expected-plans/q22.txt index dc4ddb7..4b451e9 100644 --- a/testdata/expected-plans/q22.txt +++ b/testdata/expected-plans/q22.txt @@ -22,41 +22,78 @@ Sort: custsale.cntrycode ASC NULLS LAST DataFusion Physical Plan ======================== -SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] - AggregateExec: mode=Single, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] - ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] - NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 - AggregateExec: mode=Single, gby=[], aggr=[avg(customer.c_acctbal)] - ProjectionExec: expr=[c_acctbal@1 as c_acctbal] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={ ... }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={ ... }]) - ParquetExec: file_groups={ ... }, projection=[o_custkey] - -RaySQL Plan +SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST] + SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] + AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([cntrycode@0], 2), input_partitions=2 + AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] + ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] + NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 + AggregateExec: mode=Final, gby=[], aggr=[avg(customer.c_acctbal)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[avg(customer.c_acctbal)] + ProjectionExec: expr=[c_acctbal@1 as c_acctbal] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={ ... }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 2), input_partitions=2 + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={ ... }]) + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@0], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[o_custkey] + +DataFusion Ray Distributed Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] - AggregateExec: mode=Single, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] - ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] - NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 - AggregateExec: mode=Single, gby=[], aggr=[avg(customer.c_acctbal)] - ProjectionExec: expr=[c_acctbal@1 as c_acctbal] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={ ... }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={ ... }]) - ParquetExec: file_groups={ ... }, projection=[o_custkey] +Query Stage #0 (2 -> 1): +ShuffleWriterExec(stage_id=0, output_partitioning=UnknownPartitioning(2)) + AggregateExec: mode=Partial, gby=[], aggr=[avg(customer.c_acctbal)] + ProjectionExec: expr=[c_acctbal@1 as c_acctbal] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={ ... }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] + +Query Stage #1 (2 -> 2): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={ ... }]) + +Query Stage #2 (2 -> 2): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[o_custkey] + +Query Stage #3 (2 -> 2): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 2)) + AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] + ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] + NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 + AggregateExec: mode=Final, gby=[], aggr=[avg(customer.c_acctbal)] + CoalescePartitionsExec + ShuffleReaderExec(stage_id=0, input_partitioning=UnknownPartitioning(2)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_custkey", index: 0 }], 2)) + +Query Stage #4 (2 -> 2): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 2)) + SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] + AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 2)) + +Query Stage #5 (2 -> 1): +SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST] + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 2)) diff --git a/testdata/expected-plans/q3.txt b/testdata/expected-plans/q3.txt index 789ad07..7bb5c68 100644 --- a/testdata/expected-plans/q3.txt +++ b/testdata/expected-plans/q3.txt @@ -21,45 +21,90 @@ Limit: skip=0, fetch=10 DataFusion Physical Plan ======================== -SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] - AggregateExec: mode=Single, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] +GlobalLimitExec: skip=0, fetch=10 + SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], fetch=10 + SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] + AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] - ProjectionExec: expr=[c_custkey@0 as c_custkey] + RepartitionExec: partitioning=Hash([l_orderkey@0, o_orderdate@1, o_shippriority@2], 2), input_partitions=2 + AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: file_groups={ ... }, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 < 1995-03-15 - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 > 1995-03-15 - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 2), input_partitions=2 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 2), input_partitions=2 + ProjectionExec: expr=[c_custkey@0 as c_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: c_mktsegment@1 = BUILDING + ParquetExec: file_groups={ ... }, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@1], 2), input_partitions=2 + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 < 1995-03-15 + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 2), input_partitions=2 + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 > 1995-03-15 + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] -RaySQL Plan +DataFusion Ray Distributed Plan =========== -Query Stage #0 (1 -> 1): -SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] - AggregateExec: mode=Single, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +Query Stage #0 (2 -> 2): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2)) + ProjectionExec: expr=[c_custkey@0 as c_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: c_mktsegment@1 = BUILDING + ParquetExec: file_groups={ ... }, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] + +Query Stage #1 (2 -> 2): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 < 1995-03-15 + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] + +Query Stage #2 (2 -> 2): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] - ProjectionExec: expr=[c_custkey@0 as c_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: file_groups={ ... }, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 < 1995-03-15 - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 > 1995-03-15 - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 2)) + +Query Stage #3 (2 -> 2): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 > 1995-03-15 + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] + +Query Stage #4 (2 -> 2): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 1 }, Column { name: "o_shippriority", index: 2 }], 2)) + AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + +Query Stage #5 (2 -> 2): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 2 }, Column { name: "o_shippriority", index: 3 }], 2)) + SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] + AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 1 }, Column { name: "o_shippriority", index: 2 }], 2)) + +Query Stage #6 (1 -> 1): +GlobalLimitExec: skip=0, fetch=10 + SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], fetch=10 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 2 }, Column { name: "o_shippriority", index: 3 }], 2)) diff --git a/testdata/expected-plans/q4.txt b/testdata/expected-plans/q4.txt index 754d7a7..3ba554e 100644 --- a/testdata/expected-plans/q4.txt +++ b/testdata/expected-plans/q4.txt @@ -17,35 +17,64 @@ Sort: orders.o_orderpriority ASC NULLS LAST DataFusion Physical Plan ======================== -SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] - AggregateExec: mode=Single, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@2 > l_commitdate@1 - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - -RaySQL Plan +SortPreservingMergeExec: [o_orderpriority@0 ASC NULLS LAST] + SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] + AggregateExec: mode=FinalPartitioned, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderpriority@0], 2), input_partitions=2 + AggregateExec: mode=Partial, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 2), input_partitions=2 + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 2), input_partitions=2 + ProjectionExec: expr=[l_orderkey@0 as l_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@2 > l_commitdate@1 + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + +DataFusion Ray Distributed Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] - AggregateExec: mode=Single, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@2 > l_commitdate@1 - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 +Query Stage #0 (2 -> 2): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2)) + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] + +Query Stage #1 (2 -> 2): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + ProjectionExec: expr=[l_orderkey@0 as l_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@2 > l_commitdate@1 + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + +Query Stage #2 (2 -> 2): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 2)) + AggregateExec: mode=Partial, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + +Query Stage #3 (2 -> 2): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 2)) + SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] + AggregateExec: mode=FinalPartitioned, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 2)) + +Query Stage #4 (2 -> 1): +SortPreservingMergeExec: [o_orderpriority@0 ASC NULLS LAST] + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 2)) diff --git a/testdata/expected-plans/q5.txt b/testdata/expected-plans/q5.txt index 828217f..4306de6 100644 --- a/testdata/expected-plans/q5.txt +++ b/testdata/expected-plans/q5.txt @@ -28,63 +28,150 @@ Sort: revenue DESC NULLS FIRST DataFusion Physical Plan ======================== -SortExec: expr=[revenue@1 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] - AggregateExec: mode=Single, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = AFRICA - ParquetExec: file_groups={ ... }, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name, n_regionkey] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] +SortPreservingMergeExec: [revenue@1 DESC] + SortExec: expr=[revenue@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] + AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_name@0], 2), input_partitions=2 + AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] + RepartitionExec: partitioning=Hash([r_regionkey@0], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] - ParquetExec: file_groups={ ... }, projection=[c_custkey, c_nationkey] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] - -RaySQL Plan + FilterExec: r_name@1 = AFRICA + ParquetExec: file_groups={ ... }, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_regionkey@3], 2), input_partitions=2 + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name, n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@2], 2), input_partitions=2 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0, s_nationkey@1], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_suppkey@1, c_nationkey@0], 2), input_partitions=2 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@1], 2), input_partitions=2 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[c_custkey, c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@1], 2), input_partitions=2 + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + +DataFusion Ray Distributed Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[revenue@1 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] - AggregateExec: mode=Single, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +Query Stage #0 (1 -> 2): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 2)) + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = AFRICA + ParquetExec: file_groups={ ... }, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] + +Query Stage #1 (1 -> 2): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name, n_regionkey] + +Query Stage #2 (1 -> 2): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }, Column { name: "s_nationkey", index: 1 }], 2)) + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] + +Query Stage #3 (2 -> 2): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[c_custkey, c_nationkey] + +Query Stage #4 (2 -> 2): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 2)) + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] + +Query Stage #5 (2 -> 2): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "o_orderkey", index: 1 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = AFRICA - ParquetExec: file_groups={ ... }, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name, n_regionkey] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] - ParquetExec: file_groups={ ... }, projection=[c_custkey, c_nationkey] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 2)) + +Query Stage #6 (2 -> 2): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + +Query Stage #7 (2 -> 2): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }, Column { name: "c_nationkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "o_orderkey", index: 1 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + +Query Stage #8 (2 -> 2): +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2)) + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }, Column { name: "s_nationkey", index: 1 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }, Column { name: "c_nationkey", index: 0 }], 2)) + +Query Stage #9 (2 -> 2): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 2)) + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2)) + +Query Stage #10 (2 -> 2): +ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "n_name", index: 0 }], 2)) + AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 2)) + +Query Stage #11 (2 -> 2): +ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "n_name", index: 0 }], 2)) + SortExec: expr=[revenue@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] + AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "n_name", index: 0 }], 2)) + +Query Stage #12 (2 -> 1): +SortPreservingMergeExec: [revenue@1 DESC] + ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "n_name", index: 0 }], 2)) diff --git a/testdata/expected-plans/q6.txt b/testdata/expected-plans/q6.txt index 85ecc41..37544c1 100644 --- a/testdata/expected-plans/q6.txt +++ b/testdata/expected-plans/q6.txt @@ -11,20 +11,28 @@ DataFusion Physical Plan ======================== ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 - ParquetExec: file_groups={ ... }, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 + ParquetExec: file_groups={ ... }, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] -RaySQL Plan +DataFusion Ray Distributed Plan =========== -Query Stage #0 (1 -> 1): -ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] +Query Stage #0 (2 -> 1): +ShuffleWriterExec(stage_id=0, output_partitioning=UnknownPartitioning(2)) + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 ParquetExec: file_groups={ ... }, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] +Query Stage #1 (1 -> 1): +ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] + CoalescePartitionsExec + ShuffleReaderExec(stage_id=0, input_partitioning=UnknownPartitioning(2)) + diff --git a/testdata/expected-plans/q8.txt b/testdata/expected-plans/q8.txt index 4905507..f25755b 100644 --- a/testdata/expected-plans/q8.txt +++ b/testdata/expected-plans/q8.txt @@ -38,87 +38,203 @@ Sort: all_nations.o_year ASC NULLS LAST DataFusion Physical Plan ======================== -SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] - AggregateExec: mode=Single, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] - ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] +SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] + SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] + AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: file_groups={ ... }, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] + RepartitionExec: partitioning=Hash([o_year@0], 2), input_partitions=2 + AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] + ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_regionkey] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] + RepartitionExec: partitioning=Hash([r_regionkey@0], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = MIDDLE EAST + ParquetExec: file_groups={ ... }, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_regionkey@3], 2), input_partitions=2 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] - ParquetExec: file_groups={ ... }, projection=[c_custkey, c_nationkey] - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@2], 2), input_partitions=2 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_type@1 = LARGE PLATED STEEL - ParquetExec: file_groups={ ... }, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] - -RaySQL Plan + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_nationkey@4], 2), input_partitions=2 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[c_custkey, c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@3], 2), input_partitions=2 + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 2), input_partitions=2 + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 2), input_partitions=2 + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_suppkey@1], 2), input_partitions=2 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 2), input_partitions=2 + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_type@1 = LARGE PLATED STEEL + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_partkey@1], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + +DataFusion Ray Distributed Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] - AggregateExec: mode=Single, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] - ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] +Query Stage #0 (1 -> 2): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 2)) + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = MIDDLE EAST + ParquetExec: file_groups={ ... }, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] + +Query Stage #1 (1 -> 2): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name] + +Query Stage #2 (1 -> 2): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_regionkey] + +Query Stage #3 (2 -> 2): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[c_custkey, c_nationkey] + +Query Stage #4 (2 -> 2): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] + +Query Stage #5 (1 -> 2): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] + +Query Stage #6 (1 -> 2): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2)) + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_type@1 = LARGE PLATED STEEL + ParquetExec: file_groups={ ... }, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] + +Query Stage #7 (2 -> 2): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 2)) + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + +Query Stage #8 (2 -> 2): +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 2)) + +Query Stage #9 (2 -> 2): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: file_groups={ ... }, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_regionkey] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] - ParquetExec: file_groups={ ... }, projection=[c_custkey, c_nationkey] - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_type@1 = LARGE PLATED STEEL - ParquetExec: file_groups={ ... }, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 2)) + +Query Stage #10 (2 -> 2): +ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "o_custkey", index: 3 }], 2)) + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + +Query Stage #11 (2 -> 2): +ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "c_nationkey", index: 4 }], 2)) + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "o_custkey", index: 3 }], 2)) + +Query Stage #12 (2 -> 2): +ShuffleWriterExec(stage_id=12, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2)) + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "c_nationkey", index: 4 }], 2)) + +Query Stage #13 (2 -> 2): +ShuffleWriterExec(stage_id=13, output_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 2)) + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=12, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2)) + +Query Stage #14 (2 -> 2): +ShuffleWriterExec(stage_id=14, output_partitioning=Hash([Column { name: "o_year", index: 0 }], 2)) + AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] + ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=13, input_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 2)) + +Query Stage #15 (2 -> 2): +ShuffleWriterExec(stage_id=15, output_partitioning=Hash([Column { name: "o_year", index: 0 }], 2)) + SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] + AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=14, input_partitioning=Hash([Column { name: "o_year", index: 0 }], 2)) + +Query Stage #16 (2 -> 1): +SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] + ShuffleReaderExec(stage_id=15, input_partitioning=Hash([Column { name: "o_year", index: 0 }], 2)) diff --git a/testdata/expected-plans/q9.txt b/testdata/expected-plans/q9.txt index b75bc06..20cfd92 100644 --- a/testdata/expected-plans/q9.txt +++ b/testdata/expected-plans/q9.txt @@ -27,61 +27,148 @@ Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST DataFusion Physical Plan ======================== -SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] - AggregateExec: mode=Single, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] - ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] +SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] + SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] + AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name] - ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_orderdate] - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] + RepartitionExec: partitioning=Hash([nation@0, o_year@1], 2), input_partitions=2 + AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] + ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] - ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_supplycost] - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] + RepartitionExec: partitioning=Hash([n_nationkey@0], 2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@3], 2), input_partitions=2 + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_orderdate] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] - ProjectionExec: expr=[p_partkey@0 as p_partkey] + RepartitionExec: partitioning=Hash([l_orderkey@0], 2), input_partitions=2 + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE %moccasin% - ParquetExec: file_groups={ ... }, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@1, ps_partkey@0], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_supplycost] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_suppkey@2, l_partkey@1], 2), input_partitions=2 + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 2), input_partitions=2 + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_suppkey@2], 2), input_partitions=2 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 2), input_partitions=2 + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_name@1 LIKE %moccasin% + RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 + ParquetExec: file_groups={ ... }, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_partkey@1], 2), input_partitions=2 + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] -RaySQL Plan +DataFusion Ray Distributed Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] - AggregateExec: mode=Single, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] - ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] +Query Stage #0 (1 -> 2): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name] + +Query Stage #1 (2 -> 2): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_orderdate] + +Query Stage #2 (2 -> 2): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }, Column { name: "ps_partkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_supplycost] + +Query Stage #3 (1 -> 2): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2)) + ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] + +Query Stage #4 (1 -> 2): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2)) + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_name@1 LIKE %moccasin% + ParquetExec: file_groups={ ... }, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% + +Query Stage #5 (2 -> 2): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 2)) + ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + +Query Stage #6 (2 -> 2): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_suppkey", index: 2 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 2)) + +Query Stage #7 (2 -> 2): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_suppkey", index: 2 }, Column { name: "l_partkey", index: 1 }], 2)) + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] - ParquetExec: file_groups={ ... }, projection=[n_nationkey, n_name] - ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] - ParquetExec: file_groups={ ... }, projection=[o_orderkey, o_orderdate] - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] - ParquetExec: file_groups={ ... }, projection=[ps_partkey, ps_suppkey, ps_supplycost] - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] - ParquetExec: file_groups={ ... }, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE %moccasin% - ParquetExec: file_groups={ ... }, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% - ParquetExec: file_groups={ ... }, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_suppkey", index: 2 }], 2)) + +Query Stage #8 (2 -> 2): +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }, Column { name: "ps_partkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_suppkey", index: 2 }, Column { name: "l_partkey", index: 1 }], 2)) + +Query Stage #9 (2 -> 2): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 2)) + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2)) + +Query Stage #10 (2 -> 2): +ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 2)) + AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] + ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 2)) + +Query Stage #11 (2 -> 2): +ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 2)) + SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] + AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 2)) + +Query Stage #12 (2 -> 1): +SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] + ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 2))