From 5d7b9635d53107b1ae2b571efce8017e821b1a4e Mon Sep 17 00:00:00 2001 From: Kaifei Yi Date: Wed, 20 Nov 2024 08:50:03 +0800 Subject: [PATCH] [GLUTEN-7751][VL] Merge two consecutive aggregates to one in complete mode (#7752) What changes were proposed in this pull request? Fix #7751. In issue #4668, support for the CH backend's merge agg capability was added. We can also support this feature in the velox backend. How was this patch tested? Through existing unit tests and GA verification, and additional new unit tests have been added. --- .../backendsapi/velox/VeloxRuleApi.scala | 1 + .../HashAggregateExecTransformer.scala | 14 +- .../tpch-approved-plan/v1-bhj/spark33/15.txt | 74 ++-- .../tpch-approved-plan/v1-bhj/spark34/15.txt | 74 ++-- .../tpch-approved-plan/v1/spark32/13.txt | 178 ++++---- .../tpch-approved-plan/v1/spark32/14.txt | 110 +++-- .../tpch-approved-plan/v1/spark32/17.txt | 170 ++++---- .../tpch-approved-plan/v1/spark32/18.txt | 284 +++++++------ .../tpch-approved-plan/v1/spark32/19.txt | 106 +++-- .../tpch-approved-plan/v1/spark32/20.txt | 390 +++++++++--------- .../tpch-approved-plan/v1/spark32/3.txt | 180 ++++---- .../tpch-approved-plan/v1/spark33/11.txt | 152 ++++--- .../tpch-approved-plan/v1/spark33/13.txt | 178 ++++---- .../tpch-approved-plan/v1/spark33/14.txt | 110 +++-- .../tpch-approved-plan/v1/spark33/15.txt | 74 ++-- .../tpch-approved-plan/v1/spark33/17.txt | 170 ++++---- .../tpch-approved-plan/v1/spark33/18.txt | 284 +++++++------ .../tpch-approved-plan/v1/spark33/19.txt | 106 +++-- .../tpch-approved-plan/v1/spark33/20.txt | 382 +++++++++-------- .../tpch-approved-plan/v1/spark33/3.txt | 180 ++++---- .../tpch-approved-plan/v1/spark34/11.txt | 152 ++++--- .../tpch-approved-plan/v1/spark34/13.txt | 178 ++++---- .../tpch-approved-plan/v1/spark34/14.txt | 110 +++-- .../tpch-approved-plan/v1/spark34/15.txt | 74 ++-- .../tpch-approved-plan/v1/spark34/17.txt | 170 ++++---- .../tpch-approved-plan/v1/spark34/18.txt | 284 +++++++------ .../tpch-approved-plan/v1/spark34/19.txt | 106 +++-- .../tpch-approved-plan/v1/spark34/20.txt | 382 +++++++++-------- .../tpch-approved-plan/v1/spark34/3.txt | 180 ++++---- .../VeloxAggregateFunctionsSuite.scala | 1 + docs/Configuration.md | 1 + .../MergeTwoPhasesHashBaseAggregate.scala | 5 +- .../GlutenReplaceHashWithSortAggSuite.scala | 9 +- .../GlutenReplaceHashWithSortAggSuite.scala | 2 +- .../GlutenReplaceHashWithSortAggSuite.scala | 37 +- ...MergeTwoPhasesHashBaseAggregateSuite.scala | 203 +++++++++ .../org/apache/gluten/GlutenConfig.scala | 9 + 37 files changed, 2559 insertions(+), 2561 deletions(-) rename {backends-clickhouse/src/main/scala/org/apache/gluten/extension => gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar}/MergeTwoPhasesHashBaseAggregate.scala (96%) create mode 100644 gluten-ut/test/src/test/scala/org/apache/gluten/execution/MergeTwoPhasesHashBaseAggregateSuite.scala diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala index 62e3be67ca6b..67f9d21a5e7f 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala @@ -68,6 +68,7 @@ object VeloxRuleApi { injector.injectPreTransform(_ => PushDownInputFileExpression.PreOffload) injector.injectPreTransform(c => FallbackOnANSIMode.apply(c.session)) injector.injectPreTransform(c => FallbackMultiCodegens.apply(c.session)) + injector.injectPreTransform(c => MergeTwoPhasesHashBaseAggregate(c.session)) injector.injectPreTransform(_ => RewriteSubqueryBroadcast()) injector.injectPreTransform(c => BloomFilterMightContainJointRewriteRule.apply(c.session)) injector.injectPreTransform(c => ArrowScanReplaceRule.apply(c.session)) diff --git a/backends-velox/src/main/scala/org/apache/gluten/execution/HashAggregateExecTransformer.scala b/backends-velox/src/main/scala/org/apache/gluten/execution/HashAggregateExecTransformer.scala index 61af545e411d..9a6ba3a220f1 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/execution/HashAggregateExecTransformer.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/execution/HashAggregateExecTransformer.scala @@ -214,7 +214,7 @@ abstract class HashAggregateExecTransformer( VeloxIntermediateData.getIntermediateTypeNode(aggregateFunction) ) aggregateNodeList.add(aggFunctionNode) - case Final => + case Final | Complete => val aggFunctionNode = ExpressionBuilder.makeAggregateFunction( VeloxAggregateFunctionsBuilder.create(args, aggregateFunction, aggregateMode), childrenNodeList, @@ -242,7 +242,7 @@ abstract class HashAggregateExecTransformer( aggregateFunction.inputAggBufferAttributes.head.nullable) ) aggregateNodeList.add(partialNode) - case Final => + case Final | Complete => val aggFunctionNode = ExpressionBuilder.makeAggregateFunction( VeloxAggregateFunctionsBuilder.create(args, aggregateFunction, aggregateMode), childrenNodeList, @@ -275,7 +275,7 @@ abstract class HashAggregateExecTransformer( expression.mode match { case Partial | PartialMerge => typeNodeList.add(VeloxIntermediateData.getIntermediateTypeNode(aggregateFunction)) - case Final => + case Final | Complete => typeNodeList.add( ConverterUtils .getTypeNode(aggregateFunction.dataType, aggregateFunction.nullable)) @@ -356,7 +356,7 @@ abstract class HashAggregateExecTransformer( // The process of handling the inconsistency in column types and order between // Spark and Velox is exactly the opposite of applyExtractStruct. aggregateExpression.mode match { - case PartialMerge | Final => + case PartialMerge | Final | Complete => val newInputAttributes = new ArrayBuffer[Attribute]() val childNodes = new JArrayList[ExpressionNode]() val (sparkOrders, sparkTypes) = @@ -467,7 +467,7 @@ abstract class HashAggregateExecTransformer( // by previous projection. childrenNodes.add(ExpressionBuilder.makeSelection(colIdx)) colIdx += 1 - case Partial => + case Partial | Complete => aggFunc.children.foreach { _ => childrenNodes.add(ExpressionBuilder.makeSelection(colIdx)) @@ -600,7 +600,7 @@ abstract class HashAggregateExecTransformer( } val aggregateFunc = aggExpr.aggregateFunction val childrenNodes = aggExpr.mode match { - case Partial => + case Partial | Complete => aggregateFunc.children.toList.map( expr => { ExpressionConverter @@ -784,7 +784,7 @@ case class HashAggregateExecPullOutHelper( expr.mode match { case Partial | PartialMerge => expr.aggregateFunction.aggBufferAttributes - case Final => + case Final | Complete => Seq(aggregateAttributes(index)) case other => throw new GlutenNotSupportException(s"Unsupported aggregate mode: $other.") diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/15.txt index 74246ef67225..52a87f43535d 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/15.txt @@ -237,31 +237,30 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 20 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (68) +- == Final Plan == - VeloxColumnarToRow (60) - +- ^ RegularHashAggregateExecTransformer (58) - +- ^ RegularHashAggregateExecTransformer (57) - +- ^ ProjectExecTransformer (56) - +- ^ RegularHashAggregateExecTransformer (55) - +- ^ InputIteratorTransformer (54) - +- ShuffleQueryStage (52), Statistics(X) - +- ColumnarExchange (51) - +- VeloxResizeBatches (50) - +- ^ ProjectExecTransformer (48) - +- ^ FlushableHashAggregateExecTransformer (47) - +- ^ ProjectExecTransformer (46) - +- ^ FilterExecTransformer (45) - +- ^ ScanTransformer parquet (44) + VeloxColumnarToRow (59) + +- ^ RegularHashAggregateExecTransformer (57) + +- ^ ProjectExecTransformer (56) + +- ^ RegularHashAggregateExecTransformer (55) + +- ^ InputIteratorTransformer (54) + +- ShuffleQueryStage (52), Statistics(X) + +- ColumnarExchange (51) + +- VeloxResizeBatches (50) + +- ^ ProjectExecTransformer (48) + +- ^ FlushableHashAggregateExecTransformer (47) + +- ^ ProjectExecTransformer (46) + +- ^ FilterExecTransformer (45) + +- ^ ScanTransformer parquet (44) +- == Initial Plan == - HashAggregate (68) - +- HashAggregate (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- Project (63) - +- Filter (62) - +- Scan parquet (61) + HashAggregate (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- Filter (61) + +- Scan parquet (60) (44) ScanTransformer parquet @@ -326,71 +325,64 @@ Input [2]: [l_suppkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedpri (57) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] -Functions [1]: [partial_max(total_revenue#X)] -Aggregate Attributes [1]: [max#X] -Results [1]: [max#X] - -(58) RegularHashAggregateExecTransformer -Input [1]: [max#X] -Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(59) WholeStageCodegenTransformer (X) +(58) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(60) VeloxColumnarToRow +(59) VeloxColumnarToRow Input [1]: [max(total_revenue)#X] -(61) Scan parquet +(60) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(62) Filter +(61) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(63) Project +(62) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(64) HashAggregate +(63) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(65) Exchange +(64) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(65) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] -(67) HashAggregate +(66) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(68) HashAggregate +(67) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(69) AdaptiveSparkPlan +(68) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/15.txt index a65fc78a2f03..48b06986ee1b 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/15.txt @@ -239,31 +239,30 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 20 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (68) +- == Final Plan == - VeloxColumnarToRow (60) - +- ^ RegularHashAggregateExecTransformer (58) - +- ^ RegularHashAggregateExecTransformer (57) - +- ^ ProjectExecTransformer (56) - +- ^ RegularHashAggregateExecTransformer (55) - +- ^ InputIteratorTransformer (54) - +- ShuffleQueryStage (52), Statistics(X) - +- ColumnarExchange (51) - +- VeloxResizeBatches (50) - +- ^ ProjectExecTransformer (48) - +- ^ FlushableHashAggregateExecTransformer (47) - +- ^ ProjectExecTransformer (46) - +- ^ FilterExecTransformer (45) - +- ^ ScanTransformer parquet (44) + VeloxColumnarToRow (59) + +- ^ RegularHashAggregateExecTransformer (57) + +- ^ ProjectExecTransformer (56) + +- ^ RegularHashAggregateExecTransformer (55) + +- ^ InputIteratorTransformer (54) + +- ShuffleQueryStage (52), Statistics(X) + +- ColumnarExchange (51) + +- VeloxResizeBatches (50) + +- ^ ProjectExecTransformer (48) + +- ^ FlushableHashAggregateExecTransformer (47) + +- ^ ProjectExecTransformer (46) + +- ^ FilterExecTransformer (45) + +- ^ ScanTransformer parquet (44) +- == Initial Plan == - HashAggregate (68) - +- HashAggregate (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- Project (63) - +- Filter (62) - +- Scan parquet (61) + HashAggregate (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- Filter (61) + +- Scan parquet (60) (44) ScanTransformer parquet @@ -328,71 +327,64 @@ Input [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] (57) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] -Functions [1]: [partial_max(total_revenue#X)] -Aggregate Attributes [1]: [max#X] -Results [1]: [max#X] - -(58) RegularHashAggregateExecTransformer -Input [1]: [max#X] -Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(59) WholeStageCodegenTransformer (X) +(58) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(60) VeloxColumnarToRow +(59) VeloxColumnarToRow Input [1]: [max(total_revenue)#X] -(61) Scan parquet +(60) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(62) Filter +(61) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(63) Project +(62) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(64) HashAggregate +(63) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(65) Exchange +(64) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(65) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] -(67) HashAggregate +(66) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(68) HashAggregate +(67) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(69) AdaptiveSparkPlan +(68) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt index 91444cca0528..c8ddb5437009 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt @@ -1,55 +1,54 @@ == Physical Plan == -AdaptiveSparkPlan (58) +AdaptiveSparkPlan (57) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ SortExecTransformer (38) - +- ^ InputIteratorTransformer (37) - +- ShuffleQueryStage (35) - +- ColumnarExchange (34) - +- VeloxResizeBatches (33) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ ProjectExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ RegularHashAggregateExecTransformer (20) - +- ^ ProjectExecTransformer (19) - +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) - :- ^ InputIteratorTransformer (8) - : +- ShuffleQueryStage (6) - : +- ColumnarExchange (5) - : +- VeloxResizeBatches (4) - : +- ^ ProjectExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (17) - +- ShuffleQueryStage (15) - +- ColumnarExchange (14) - +- VeloxResizeBatches (13) - +- ^ ProjectExecTransformer (11) - +- ^ FilterExecTransformer (10) - +- ^ ScanTransformer parquet (9) + VeloxColumnarToRow (39) + +- ^ SortExecTransformer (37) + +- ^ InputIteratorTransformer (36) + +- ShuffleQueryStage (34) + +- ColumnarExchange (33) + +- VeloxResizeBatches (32) + +- ^ RegularHashAggregateExecTransformer (30) + +- ^ InputIteratorTransformer (29) + +- ShuffleQueryStage (27) + +- ColumnarExchange (26) + +- VeloxResizeBatches (25) + +- ^ ProjectExecTransformer (23) + +- ^ FlushableHashAggregateExecTransformer (22) + +- ^ ProjectExecTransformer (21) + +- ^ RegularHashAggregateExecTransformer (20) + +- ^ ProjectExecTransformer (19) + +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) + :- ^ InputIteratorTransformer (8) + : +- ShuffleQueryStage (6) + : +- ColumnarExchange (5) + : +- VeloxResizeBatches (4) + : +- ^ ProjectExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (17) + +- ShuffleQueryStage (15) + +- ColumnarExchange (14) + +- VeloxResizeBatches (13) + +- ^ ProjectExecTransformer (11) + +- ^ FilterExecTransformer (10) + +- ^ ScanTransformer parquet (9) +- == Initial Plan == - Sort (57) - +- Exchange (56) - +- HashAggregate (55) - +- Exchange (54) - +- HashAggregate (53) - +- HashAggregate (52) - +- HashAggregate (51) - +- Project (50) - +- SortMergeJoin LeftOuter (49) - :- Sort (43) - : +- Exchange (42) - : +- Scan parquet (41) - +- Sort (48) - +- Exchange (47) - +- Project (46) - +- Filter (45) - +- Scan parquet (44) + Sort (56) + +- Exchange (55) + +- HashAggregate (54) + +- Exchange (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin LeftOuter (48) + :- Sort (42) + : +- Exchange (41) + : +- Scan parquet (40) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) ScanTransformer parquet @@ -133,180 +132,173 @@ Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] (20) RegularHashAggregateExecTransformer Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] -Functions [1]: [partial_count(o_orderkey#X)] -Aggregate Attributes [1]: [count#X] -Results [2]: [c_custkey#X, count#X] - -(21) RegularHashAggregateExecTransformer -Input [2]: [c_custkey#X, count#X] -Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [2]: [c_custkey#X, count(o_orderkey#X)#X] -(22) ProjectExecTransformer +(21) ProjectExecTransformer Output [1]: [count(o_orderkey#X)#X AS c_count#X] Input [2]: [c_custkey#X, count(o_orderkey#X)#X] -(23) FlushableHashAggregateExecTransformer +(22) FlushableHashAggregateExecTransformer Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(24) ProjectExecTransformer +(23) ProjectExecTransformer Output [3]: [hash(c_count#X, 42) AS hash_partition_key#X, c_count#X, count#X] Input [2]: [c_count#X, count#X] -(25) WholeStageCodegenTransformer (X) +(24) WholeStageCodegenTransformer (X) Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: false -(26) VeloxResizeBatches +(25) VeloxResizeBatches Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: X, X -(27) ColumnarExchange +(26) ColumnarExchange Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [c_count#X, count#X], [plan_id=X], [shuffle_writer_type=hash] -(28) ShuffleQueryStage +(27) ShuffleQueryStage Output [2]: [c_count#X, count#X] Arguments: X -(29) InputAdapter +(28) InputAdapter Input [2]: [c_count#X, count#X] -(30) InputIteratorTransformer +(29) InputIteratorTransformer Input [2]: [c_count#X, count#X] -(31) RegularHashAggregateExecTransformer +(30) RegularHashAggregateExecTransformer Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(32) WholeStageCodegenTransformer (X) +(31) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(33) VeloxResizeBatches +(32) VeloxResizeBatches Input [2]: [c_count#X, custdist#X] Arguments: X, X -(34) ColumnarExchange +(33) ColumnarExchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(35) ShuffleQueryStage +(34) ShuffleQueryStage Output [2]: [c_count#X, custdist#X] Arguments: X -(36) InputAdapter +(35) InputAdapter Input [2]: [c_count#X, custdist#X] -(37) InputIteratorTransformer +(36) InputIteratorTransformer Input [2]: [c_count#X, custdist#X] -(38) SortExecTransformer +(37) SortExecTransformer Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [2]: [c_count#X, custdist#X] -(41) Scan parquet +(40) Scan parquet Output [1]: [c_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(41) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Sort +(42) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(44) Scan parquet +(43) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(46) Project +(45) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(47) Exchange +(46) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [2]: [o_orderkey#X, o_custkey#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(49) SortMergeJoin +(48) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(50) Project +(49) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(51) HashAggregate +(50) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(52) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(53) HashAggregate +(52) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(54) Exchange +(53) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) HashAggregate +(54) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(56) Exchange +(55) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(57) Sort +(56) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(58) AdaptiveSparkPlan +(57) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt index 33c0dfb71e1e..40a891738143 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt @@ -1,40 +1,39 @@ == Physical Plan == -AdaptiveSparkPlan (39) +AdaptiveSparkPlan (38) +- == Final Plan == - VeloxColumnarToRow (25) - +- ^ ProjectExecTransformer (23) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ ScanTransformer parquet (10) + VeloxColumnarToRow (24) + +- ^ ProjectExecTransformer (22) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ ScanTransformer parquet (10) +- == Initial Plan == - HashAggregate (38) - +- HashAggregate (37) - +- Project (36) - +- SortMergeJoin Inner (35) - :- Sort (30) - : +- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Sort (34) - +- Exchange (33) - +- Filter (32) - +- Scan parquet (31) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) ScanTransformer parquet @@ -123,93 +122,86 @@ Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] (21) RegularHashAggregateExecTransformer Input [5]: [l_extendedprice#X, l_discount#X, p_type#X, _pre_X#X, _pre_X#X] Keys: [] -Functions [2]: [partial_sum(_pre_X#X), partial_sum(_pre_X#X)] -Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Keys: [] -Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] +Functions [2]: [sum(_pre_X#X), sum(_pre_X#X)] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] -(23) ProjectExecTransformer +(22) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X)), DecimalType(38,6), true)) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X as decimal(38,6)))), DecimalType(38,6), true) AS promo_revenue#X] Input [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] -(24) WholeStageCodegenTransformer (X) +(23) WholeStageCodegenTransformer (X) Input [1]: [promo_revenue#X] Arguments: false -(25) VeloxColumnarToRow +(24) VeloxColumnarToRow Input [1]: [promo_revenue#X] -(26) Scan parquet +(25) Scan parquet Output [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-09-01), LessThan(l_shipdate,1995-10-01), IsNotNull(l_partkey)] ReadSchema: struct -(27) Filter +(26) Filter Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-09-01)) AND (l_shipdate#X < 1995-10-01)) AND isnotnull(l_partkey#X)) -(28) Project +(27) Project Output [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(29) Exchange +(28) Exchange Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Sort +(29) Sort Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(31) Scan parquet +(30) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(32) Filter +(31) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(33) Exchange +(32) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Sort +(33) Sort Input [2]: [p_partkey#X, p_type#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(35) SortMergeJoin +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(36) Project +(35) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(37) HashAggregate +(36) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(38) HashAggregate +(37) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X)), DecimalType(38,6), true)) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X as decimal(38,6)))), DecimalType(38,6), true) AS promo_revenue#X] -(39) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt index 94e892d1e8c9..30fd4b0b1124 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt @@ -1,62 +1,61 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (62) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ ProjectExecTransformer (38) - +- ^ RegularHashAggregateExecTransformer (37) - +- ^ RegularHashAggregateExecTransformer (36) - +- ^ ProjectExecTransformer (35) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) - :- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ ScanTransformer parquet (10) - +- ^ FilterExecTransformer (33) - +- ^ ProjectExecTransformer (32) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ FilterExecTransformer (22) - +- ^ ScanTransformer parquet (21) + VeloxColumnarToRow (39) + +- ^ ProjectExecTransformer (37) + +- ^ RegularHashAggregateExecTransformer (36) + +- ^ ProjectExecTransformer (35) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) + :- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ ScanTransformer parquet (10) + +- ^ FilterExecTransformer (33) + +- ^ ProjectExecTransformer (32) + +- ^ RegularHashAggregateExecTransformer (31) + +- ^ InputIteratorTransformer (30) + +- ShuffleQueryStage (28) + +- ColumnarExchange (27) + +- VeloxResizeBatches (26) + +- ^ ProjectExecTransformer (24) + +- ^ FlushableHashAggregateExecTransformer (23) + +- ^ FilterExecTransformer (22) + +- ^ ScanTransformer parquet (21) +- == Initial Plan == - HashAggregate (62) - +- HashAggregate (61) - +- Project (60) - +- SortMergeJoin Inner (59) - :- Project (51) - : +- SortMergeJoin Inner (50) - : :- Sort (44) - : : +- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Sort (49) - : +- Exchange (48) - : +- Project (47) - : +- Filter (46) - : +- Scan parquet (45) - +- Sort (58) - +- Filter (57) - +- HashAggregate (56) - +- Exchange (55) - +- HashAggregate (54) - +- Filter (53) - +- Scan parquet (52) + HashAggregate (61) + +- HashAggregate (60) + +- Project (59) + +- SortMergeJoin Inner (58) + :- Project (50) + : +- SortMergeJoin Inner (49) + : :- Sort (43) + : : +- Exchange (42) + : : +- Filter (41) + : : +- Scan parquet (40) + : +- Sort (48) + : +- Exchange (47) + : +- Project (46) + : +- Filter (45) + : +- Scan parquet (44) + +- Sort (57) + +- Filter (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- Filter (52) + +- Scan parquet (51) (1) ScanTransformer parquet @@ -213,139 +212,132 @@ Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity) (36) RegularHashAggregateExecTransformer Input [1]: [l_extendedprice#X] Keys: [] -Functions [1]: [partial_sum(l_extendedprice#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(37) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [sum(l_extendedprice#X)#X] -(38) ProjectExecTransformer +(37) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6), true) AS avg_yearly#X] Input [1]: [sum(l_extendedprice#X)#X] -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [1]: [avg_yearly#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [1]: [avg_yearly#X] -(41) Scan parquet +(40) Scan parquet Output [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_quantity)] ReadSchema: struct -(42) Filter +(41) Filter Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) -(43) Exchange +(42) Exchange Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(43) Sort Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(45) Scan parquet +(44) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(46) Filter +(45) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(47) Project +(46) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(48) Exchange +(47) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(48) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(50) SortMergeJoin +(49) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(51) Project +(50) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(52) Scan parquet +(51) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(53) Filter +(52) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(54) HashAggregate +(53) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(55) Exchange +(54) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) HashAggregate +(55) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [CheckOverflow((0.200000 * promote_precision(avg(l_quantity#X)#X)), DecimalType(18,7), true) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(57) Filter +(56) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(58) Sort +(57) Sort Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(59) SortMergeJoin +(58) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(60) Project +(59) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(61) HashAggregate +(60) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(62) HashAggregate +(61) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6), true) AS avg_yearly#X] -(63) AdaptiveSparkPlan +(62) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt index 550cd076c1c2..dd4cad538e2c 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt @@ -1,102 +1,101 @@ == Physical Plan == -AdaptiveSparkPlan (110) +AdaptiveSparkPlan (109) +- == Final Plan == - VeloxColumnarToRow (70) - +- TakeOrderedAndProjectExecTransformer (69) - +- ^ RegularHashAggregateExecTransformer (67) - +- ^ RegularHashAggregateExecTransformer (66) - +- ^ ProjectExecTransformer (65) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) - :- ^ InputIteratorTransformer (46) - : +- ShuffleQueryStage (44) - : +- ColumnarExchange (43) - : +- VeloxResizeBatches (42) - : +- ^ ProjectExecTransformer (40) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (38) - : +- ShuffleQueryStage (36) - : +- ColumnarExchange (35) - : +- VeloxResizeBatches (34) - : +- ^ ProjectExecTransformer (32) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) - : :- ^ InputIteratorTransformer (18) - : : +- ShuffleQueryStage (16) - : : +- ColumnarExchange (15) - : : +- VeloxResizeBatches (14) - : : +- ^ ProjectExecTransformer (12) - : : +- ^ FilterExecTransformer (11) - : : +- ^ ScanTransformer parquet (10) - : +- ^ ProjectExecTransformer (30) - : +- ^ FilterExecTransformer (29) - : +- ^ RegularHashAggregateExecTransformer (28) - : +- ^ InputIteratorTransformer (27) - : +- ShuffleQueryStage (25) - : +- ColumnarExchange (24) - : +- VeloxResizeBatches (23) - : +- ^ ProjectExecTransformer (21) - : +- ^ FlushableHashAggregateExecTransformer (20) - : +- ^ ScanTransformer parquet (19) - +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) - :- ^ InputIteratorTransformer (55) - : +- ShuffleQueryStage (53) - : +- ColumnarExchange (52) - : +- VeloxResizeBatches (51) - : +- ^ ProjectExecTransformer (49) - : +- ^ FilterExecTransformer (48) - : +- ^ ScanTransformer parquet (47) - +- ^ ProjectExecTransformer (62) - +- ^ FilterExecTransformer (61) - +- ^ RegularHashAggregateExecTransformer (60) - +- ^ InputIteratorTransformer (59) - +- ShuffleQueryStage (57) - +- ReusedExchange (56) + VeloxColumnarToRow (69) + +- TakeOrderedAndProjectExecTransformer (68) + +- ^ RegularHashAggregateExecTransformer (66) + +- ^ ProjectExecTransformer (65) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) + :- ^ InputIteratorTransformer (46) + : +- ShuffleQueryStage (44) + : +- ColumnarExchange (43) + : +- VeloxResizeBatches (42) + : +- ^ ProjectExecTransformer (40) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (38) + : +- ShuffleQueryStage (36) + : +- ColumnarExchange (35) + : +- VeloxResizeBatches (34) + : +- ^ ProjectExecTransformer (32) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) + : :- ^ InputIteratorTransformer (18) + : : +- ShuffleQueryStage (16) + : : +- ColumnarExchange (15) + : : +- VeloxResizeBatches (14) + : : +- ^ ProjectExecTransformer (12) + : : +- ^ FilterExecTransformer (11) + : : +- ^ ScanTransformer parquet (10) + : +- ^ ProjectExecTransformer (30) + : +- ^ FilterExecTransformer (29) + : +- ^ RegularHashAggregateExecTransformer (28) + : +- ^ InputIteratorTransformer (27) + : +- ShuffleQueryStage (25) + : +- ColumnarExchange (24) + : +- VeloxResizeBatches (23) + : +- ^ ProjectExecTransformer (21) + : +- ^ FlushableHashAggregateExecTransformer (20) + : +- ^ ScanTransformer parquet (19) + +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) + :- ^ InputIteratorTransformer (55) + : +- ShuffleQueryStage (53) + : +- ColumnarExchange (52) + : +- VeloxResizeBatches (51) + : +- ^ ProjectExecTransformer (49) + : +- ^ FilterExecTransformer (48) + : +- ^ ScanTransformer parquet (47) + +- ^ ProjectExecTransformer (62) + +- ^ FilterExecTransformer (61) + +- ^ RegularHashAggregateExecTransformer (60) + +- ^ InputIteratorTransformer (59) + +- ShuffleQueryStage (57) + +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (109) - +- HashAggregate (108) - +- HashAggregate (107) - +- Project (106) - +- SortMergeJoin Inner (105) - :- Sort (92) - : +- Exchange (91) - : +- Project (90) - : +- SortMergeJoin Inner (89) - : :- Sort (74) - : : +- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Sort (88) - : +- Exchange (87) - : +- SortMergeJoin LeftSemi (86) - : :- Sort (78) - : : +- Exchange (77) - : : +- Filter (76) - : : +- Scan parquet (75) - : +- Sort (85) - : +- Project (84) - : +- Filter (83) - : +- HashAggregate (82) - : +- Exchange (81) - : +- HashAggregate (80) - : +- Scan parquet (79) - +- SortMergeJoin LeftSemi (104) - :- Sort (96) - : +- Exchange (95) - : +- Filter (94) - : +- Scan parquet (93) - +- Sort (103) - +- Project (102) - +- Filter (101) - +- HashAggregate (100) - +- Exchange (99) - +- HashAggregate (98) - +- Scan parquet (97) + TakeOrderedAndProject (108) + +- HashAggregate (107) + +- HashAggregate (106) + +- Project (105) + +- SortMergeJoin Inner (104) + :- Sort (91) + : +- Exchange (90) + : +- Project (89) + : +- SortMergeJoin Inner (88) + : :- Sort (73) + : : +- Exchange (72) + : : +- Filter (71) + : : +- Scan parquet (70) + : +- Sort (87) + : +- Exchange (86) + : +- SortMergeJoin LeftSemi (85) + : :- Sort (77) + : : +- Exchange (76) + : : +- Filter (75) + : : +- Scan parquet (74) + : +- Sort (84) + : +- Project (83) + : +- Filter (82) + : +- HashAggregate (81) + : +- Exchange (80) + : +- HashAggregate (79) + : +- Scan parquet (78) + +- SortMergeJoin LeftSemi (103) + :- Sort (95) + : +- Exchange (94) + : +- Filter (93) + : +- Scan parquet (92) + +- Sort (102) + +- Project (101) + +- Filter (100) + +- HashAggregate (99) + +- Exchange (98) + +- HashAggregate (97) + +- Scan parquet (96) (1) ScanTransformer parquet @@ -371,219 +370,212 @@ Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, (66) RegularHashAggregateExecTransformer Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] - -(67) RegularHashAggregateExecTransformer -Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(68) WholeStageCodegenTransformer (X) +(67) WholeStageCodegenTransformer (X) Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: false -(69) TakeOrderedAndProjectExecTransformer +(68) TakeOrderedAndProjectExecTransformer Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X], 0 -(70) VeloxColumnarToRow +(69) VeloxColumnarToRow Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(71) Scan parquet +(70) Scan parquet Output [2]: [c_custkey#X, c_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey)] ReadSchema: struct -(72) Filter +(71) Filter Input [2]: [c_custkey#X, c_name#X] Condition : isnotnull(c_custkey#X) -(73) Exchange +(72) Exchange Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Sort +(73) Sort Input [2]: [c_custkey#X, c_name#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(75) Scan parquet +(74) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(76) Filter +(75) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(77) Exchange +(76) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Sort +(77) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(79) Scan parquet +(78) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(80) HashAggregate +(79) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(81) Exchange +(80) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) HashAggregate +(81) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(83) Filter +(82) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(84) Project +(83) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(85) Sort +(84) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(86) SortMergeJoin +(85) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(87) Exchange +(86) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Sort +(87) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(89) SortMergeJoin +(88) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(90) Project +(89) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(91) Exchange +(90) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) Sort +(91) Sort Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(93) Scan parquet +(92) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(94) Filter +(93) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(95) Exchange +(94) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(96) Sort +(95) Sort Input [2]: [l_orderkey#X, l_quantity#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(97) Scan parquet +(96) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(98) HashAggregate +(97) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(99) Exchange +(98) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) HashAggregate +(99) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(101) Filter +(100) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(102) Project +(101) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(103) Sort +(102) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(104) SortMergeJoin +(103) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(105) SortMergeJoin +(104) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(106) Project +(105) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(107) HashAggregate +(106) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(108) HashAggregate +(107) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(109) TakeOrderedAndProject +(108) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(110) AdaptiveSparkPlan +(109) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt index f7778691955a..507b700e37e9 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt @@ -1,39 +1,38 @@ == Physical Plan == -AdaptiveSparkPlan (38) +AdaptiveSparkPlan (37) +- == Final Plan == - VeloxColumnarToRow (24) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ ScanTransformer parquet (10) + VeloxColumnarToRow (23) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ ScanTransformer parquet (10) +- == Initial Plan == - HashAggregate (37) - +- HashAggregate (36) - +- Project (35) - +- SortMergeJoin Inner (34) - :- Sort (29) - : +- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Sort (33) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (36) + +- HashAggregate (35) + +- Project (34) + +- SortMergeJoin Inner (33) + :- Sort (28) + : +- Exchange (27) + : +- Project (26) + : +- Filter (25) + : +- Scan parquet (24) + +- Sort (32) + +- Exchange (31) + +- Filter (30) + +- Scan parquet (29) (1) ScanTransformer parquet @@ -122,89 +121,82 @@ Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partke (21) RegularHashAggregateExecTransformer Input [3]: [l_extendedprice#X, l_discount#X, _pre_X#X] Keys: [] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X] -(23) WholeStageCodegenTransformer (X) +(22) WholeStageCodegenTransformer (X) Input [1]: [revenue#X] Arguments: false -(24) VeloxColumnarToRow +(23) VeloxColumnarToRow Input [1]: [revenue#X] -(25) Scan parquet +(24) Scan parquet Output [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipinstruct), In(l_shipmode, [AIR,AIR REG]), EqualTo(l_shipinstruct,DELIVER IN PERSON), IsNotNull(l_partkey), Or(Or(And(GreaterThanOrEqual(l_quantity,1.00),LessThanOrEqual(l_quantity,11.00)),And(GreaterThanOrEqual(l_quantity,10.00),LessThanOrEqual(l_quantity,20.00))),And(GreaterThanOrEqual(l_quantity,20.00),LessThanOrEqual(l_quantity,30.00)))] ReadSchema: struct -(26) Filter +(25) Filter Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Condition : ((((isnotnull(l_shipinstruct#X) AND l_shipmode#X IN (AIR,AIR REG)) AND (l_shipinstruct#X = DELIVER IN PERSON)) AND isnotnull(l_partkey#X)) AND ((((l_quantity#X >= 1.00) AND (l_quantity#X <= 11.00)) OR ((l_quantity#X >= 10.00) AND (l_quantity#X <= 20.00))) OR ((l_quantity#X >= 20.00) AND (l_quantity#X <= 30.00)))) -(27) Project +(26) Project Output [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] -(28) Exchange +(27) Exchange Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Sort +(28) Sort Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(30) Scan parquet +(29) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(31) Filter +(30) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(32) Exchange +(31) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) Sort +(32) Sort Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(34) SortMergeJoin +(33) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(35) Project +(34) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(36) HashAggregate +(35) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(37) HashAggregate +(36) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X] -(38) AdaptiveSparkPlan +(37) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt index 5dd6bbfccab6..13c10a7bba29 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt @@ -1,20 +1,20 @@ == Physical Plan == -AdaptiveSparkPlan (146) +AdaptiveSparkPlan (145) +- == Final Plan == - VeloxColumnarToRow (96) - +- ^ SortExecTransformer (94) - +- ^ InputIteratorTransformer (93) - +- ShuffleQueryStage (91) - +- ColumnarExchange (90) - +- VeloxResizeBatches (89) - +- ^ ProjectExecTransformer (87) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (86) - :- ^ InputIteratorTransformer (76) - : +- ShuffleQueryStage (74) - : +- ColumnarExchange (73) - : +- VeloxResizeBatches (72) - : +- ^ ProjectExecTransformer (70) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (69) + VeloxColumnarToRow (95) + +- ^ SortExecTransformer (93) + +- ^ InputIteratorTransformer (92) + +- ShuffleQueryStage (90) + +- ColumnarExchange (89) + +- VeloxResizeBatches (88) + +- ^ ProjectExecTransformer (86) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (85) + :- ^ InputIteratorTransformer (75) + : +- ShuffleQueryStage (73) + : +- ColumnarExchange (72) + : +- VeloxResizeBatches (71) + : +- ^ ProjectExecTransformer (69) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (68) : :- ^ InputIteratorTransformer (9) : : +- ShuffleQueryStage (7) : : +- ColumnarExchange (6) @@ -22,12 +22,12 @@ AdaptiveSparkPlan (146) : : +- ^ ProjectExecTransformer (3) : : +- ^ FilterExecTransformer (2) : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (68) - : +- ShuffleQueryStage (66) - : +- ColumnarExchange (65) - : +- VeloxResizeBatches (64) - : +- ^ ProjectExecTransformer (62) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (61) + : +- ^ InputIteratorTransformer (67) + : +- ShuffleQueryStage (65) + : +- ColumnarExchange (64) + : +- VeloxResizeBatches (63) + : +- ^ ProjectExecTransformer (61) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (60) : :- ^ InputIteratorTransformer (35) : : +- ShuffleQueryStage (33) : : +- ColumnarExchange (32) @@ -48,83 +48,82 @@ AdaptiveSparkPlan (146) : : +- ^ ProjectExecTransformer (21) : : +- ^ FilterExecTransformer (20) : : +- ^ ScanTransformer parquet (19) - : +- ^ InputIteratorTransformer (60) - : +- ShuffleQueryStage (58) - : +- ColumnarExchange (57) - : +- VeloxResizeBatches (56) - : +- ^ ProjectExecTransformer (54) - : +- ^ FilterExecTransformer (53) - : +- ^ ProjectExecTransformer (52) - : +- ^ RegularHashAggregateExecTransformer (51) - : +- ^ RegularHashAggregateExecTransformer (50) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) - : :- ^ InputIteratorTransformer (44) - : : +- ShuffleQueryStage (42) - : : +- ColumnarExchange (41) - : : +- VeloxResizeBatches (40) - : : +- ^ ProjectExecTransformer (38) - : : +- ^ FilterExecTransformer (37) - : : +- ^ ScanTransformer parquet (36) - : +- ^ InputIteratorTransformer (48) - : +- ShuffleQueryStage (46) - : +- ReusedExchange (45) - +- ^ InputIteratorTransformer (85) - +- ShuffleQueryStage (83) - +- ColumnarExchange (82) - +- VeloxResizeBatches (81) - +- ^ ProjectExecTransformer (79) - +- ^ FilterExecTransformer (78) - +- ^ ScanTransformer parquet (77) + : +- ^ InputIteratorTransformer (59) + : +- ShuffleQueryStage (57) + : +- ColumnarExchange (56) + : +- VeloxResizeBatches (55) + : +- ^ ProjectExecTransformer (53) + : +- ^ FilterExecTransformer (52) + : +- ^ ProjectExecTransformer (51) + : +- ^ RegularHashAggregateExecTransformer (50) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) + : :- ^ InputIteratorTransformer (44) + : : +- ShuffleQueryStage (42) + : : +- ColumnarExchange (41) + : : +- VeloxResizeBatches (40) + : : +- ^ ProjectExecTransformer (38) + : : +- ^ FilterExecTransformer (37) + : : +- ^ ScanTransformer parquet (36) + : +- ^ InputIteratorTransformer (48) + : +- ShuffleQueryStage (46) + : +- ReusedExchange (45) + +- ^ InputIteratorTransformer (84) + +- ShuffleQueryStage (82) + +- ColumnarExchange (81) + +- VeloxResizeBatches (80) + +- ^ ProjectExecTransformer (78) + +- ^ FilterExecTransformer (77) + +- ^ ScanTransformer parquet (76) +- == Initial Plan == - Sort (145) - +- Exchange (144) - +- Project (143) - +- SortMergeJoin Inner (142) - :- Sort (136) - : +- Exchange (135) - : +- Project (134) - : +- SortMergeJoin LeftSemi (133) - : :- Sort (100) - : : +- Exchange (99) - : : +- Filter (98) - : : +- Scan parquet (97) - : +- Sort (132) - : +- Exchange (131) - : +- Project (130) - : +- SortMergeJoin Inner (129) - : :- Sort (112) - : : +- Exchange (111) - : : +- SortMergeJoin LeftSemi (110) - : : :- Sort (104) - : : : +- Exchange (103) - : : : +- Filter (102) - : : : +- Scan parquet (101) - : : +- Sort (109) - : : +- Exchange (108) - : : +- Project (107) - : : +- Filter (106) - : : +- Scan parquet (105) - : +- Sort (128) - : +- Exchange (127) - : +- Filter (126) - : +- HashAggregate (125) - : +- HashAggregate (124) - : +- SortMergeJoin LeftSemi (123) - : :- Sort (117) - : : +- Exchange (116) - : : +- Project (115) - : : +- Filter (114) - : : +- Scan parquet (113) - : +- Sort (122) - : +- Exchange (121) - : +- Project (120) - : +- Filter (119) - : +- Scan parquet (118) - +- Sort (141) - +- Exchange (140) - +- Project (139) - +- Filter (138) - +- Scan parquet (137) + Sort (144) + +- Exchange (143) + +- Project (142) + +- SortMergeJoin Inner (141) + :- Sort (135) + : +- Exchange (134) + : +- Project (133) + : +- SortMergeJoin LeftSemi (132) + : :- Sort (99) + : : +- Exchange (98) + : : +- Filter (97) + : : +- Scan parquet (96) + : +- Sort (131) + : +- Exchange (130) + : +- Project (129) + : +- SortMergeJoin Inner (128) + : :- Sort (111) + : : +- Exchange (110) + : : +- SortMergeJoin LeftSemi (109) + : : :- Sort (103) + : : : +- Exchange (102) + : : : +- Filter (101) + : : : +- Scan parquet (100) + : : +- Sort (108) + : : +- Exchange (107) + : : +- Project (106) + : : +- Filter (105) + : : +- Scan parquet (104) + : +- Sort (127) + : +- Exchange (126) + : +- Filter (125) + : +- HashAggregate (124) + : +- HashAggregate (123) + : +- SortMergeJoin LeftSemi (122) + : :- Sort (116) + : : +- Exchange (115) + : : +- Project (114) + : : +- Filter (113) + : : +- Scan parquet (112) + : +- Sort (121) + : +- Exchange (120) + : +- Project (119) + : +- Filter (118) + : +- Scan parquet (117) + +- Sort (140) + +- Exchange (139) + +- Project (138) + +- Filter (137) + +- Scan parquet (136) (1) ScanTransformer parquet @@ -327,417 +326,410 @@ Join condition: None (50) RegularHashAggregateExecTransformer Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] - -(51) RegularHashAggregateExecTransformer -Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(52) ProjectExecTransformer +(51) ProjectExecTransformer Output [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3), true) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(53) FilterExecTransformer +(52) FilterExecTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: isnotnull((0.5 * sum(l_quantity))#X) -(54) ProjectExecTransformer +(53) ProjectExecTransformer Output [4]: [hash(l_partkey#X, l_suppkey#X, 42) AS hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(55) WholeStageCodegenTransformer (X) +(54) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: false -(56) VeloxResizeBatches +(55) VeloxResizeBatches Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X, X -(57) ColumnarExchange +(56) ColumnarExchange Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(58) ShuffleQueryStage +(57) ShuffleQueryStage Output [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X -(59) InputAdapter +(58) InputAdapter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(60) InputIteratorTransformer +(59) InputIteratorTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(61) ShuffledHashJoinExecTransformer +(60) ShuffledHashJoinExecTransformer Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(cast(ps_availqty#X as decimal(10,0)) as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(62) ProjectExecTransformer +(61) ProjectExecTransformer Output [2]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(63) WholeStageCodegenTransformer (X) +(62) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: false -(64) VeloxResizeBatches +(63) VeloxResizeBatches Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: X, X -(65) ColumnarExchange +(64) ColumnarExchange Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(66) ShuffleQueryStage +(65) ShuffleQueryStage Output [1]: [ps_suppkey#X] Arguments: X -(67) InputAdapter +(66) InputAdapter Input [1]: [ps_suppkey#X] -(68) InputIteratorTransformer +(67) InputIteratorTransformer Input [1]: [ps_suppkey#X] -(69) ShuffledHashJoinExecTransformer +(68) ShuffledHashJoinExecTransformer Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(70) ProjectExecTransformer +(69) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(71) WholeStageCodegenTransformer (X) +(70) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: false -(72) VeloxResizeBatches +(71) VeloxResizeBatches Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: X, X -(73) ColumnarExchange +(72) ColumnarExchange Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [s_name#X, s_address#X, s_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(74) ShuffleQueryStage +(73) ShuffleQueryStage Output [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: X -(75) InputAdapter +(74) InputAdapter Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(76) InputIteratorTransformer +(75) InputIteratorTransformer Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(77) ScanTransformer parquet +(76) ScanTransformer parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(78) FilterExecTransformer +(77) FilterExecTransformer Input [2]: [n_nationkey#X, n_name#X] Arguments: ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(79) ProjectExecTransformer +(78) ProjectExecTransformer Output [2]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(80) WholeStageCodegenTransformer (X) +(79) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: false -(81) VeloxResizeBatches +(80) VeloxResizeBatches Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: X, X -(82) ColumnarExchange +(81) ColumnarExchange Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(83) ShuffleQueryStage +(82) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(84) InputAdapter +(83) InputAdapter Input [1]: [n_nationkey#X] -(85) InputIteratorTransformer +(84) InputIteratorTransformer Input [1]: [n_nationkey#X] -(86) ShuffledHashJoinExecTransformer +(85) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(87) ProjectExecTransformer +(86) ProjectExecTransformer Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(88) WholeStageCodegenTransformer (X) +(87) WholeStageCodegenTransformer (X) Input [2]: [s_name#X, s_address#X] Arguments: false -(89) VeloxResizeBatches +(88) VeloxResizeBatches Input [2]: [s_name#X, s_address#X] Arguments: X, X -(90) ColumnarExchange +(89) ColumnarExchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(91) ShuffleQueryStage +(90) ShuffleQueryStage Output [2]: [s_name#X, s_address#X] Arguments: X -(92) InputAdapter +(91) InputAdapter Input [2]: [s_name#X, s_address#X] -(93) InputIteratorTransformer +(92) InputIteratorTransformer Input [2]: [s_name#X, s_address#X] -(94) SortExecTransformer +(93) SortExecTransformer Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(95) WholeStageCodegenTransformer (X) +(94) WholeStageCodegenTransformer (X) Input [2]: [s_name#X, s_address#X] Arguments: false -(96) VeloxColumnarToRow +(95) VeloxColumnarToRow Input [2]: [s_name#X, s_address#X] -(97) Scan parquet +(96) Scan parquet Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_nationkey)] ReadSchema: struct -(98) Filter +(97) Filter Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Condition : isnotnull(s_nationkey#X) -(99) Exchange +(98) Exchange Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Sort +(99) Sort Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 -(101) Scan parquet +(100) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(102) Filter +(101) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(103) Exchange +(102) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(104) Sort +(103) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 -(105) Scan parquet +(104) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(106) Filter +(105) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(107) Project +(106) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(108) Exchange +(107) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Sort +(108) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(110) SortMergeJoin +(109) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(111) Exchange +(110) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(112) Sort +(111) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 -(113) Scan parquet +(112) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(114) Filter +(113) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(115) Project +(114) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(116) Exchange +(115) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Sort +(116) Sort Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(118) Scan parquet +(117) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(119) Filter +(118) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(120) Project +(119) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(121) Exchange +(120) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) Sort +(121) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(123) SortMergeJoin +(122) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(124) HashAggregate +(123) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(125) HashAggregate +(124) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3), true) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(126) Filter +(125) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(127) Exchange +(126) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(128) Sort +(127) Sort Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 -(129) SortMergeJoin +(128) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(cast(ps_availqty#X as decimal(10,0)) as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(130) Project +(129) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(131) Exchange +(130) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) Sort +(131) Sort Input [1]: [ps_suppkey#X] Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 -(133) SortMergeJoin +(132) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(134) Project +(133) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(135) Exchange +(134) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) Sort +(135) Sort Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 -(137) Scan parquet +(136) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(138) Filter +(137) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(139) Project +(138) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(140) Exchange +(139) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(141) Sort +(140) Sort Input [1]: [n_nationkey#X] Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 -(142) SortMergeJoin +(141) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(143) Project +(142) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(144) Exchange +(143) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) Sort +(144) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(146) AdaptiveSparkPlan +(145) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt index c4df05de1618..2e427b6bf3ca 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt @@ -1,64 +1,63 @@ == Physical Plan == -AdaptiveSparkPlan (67) +AdaptiveSparkPlan (66) +- == Final Plan == - VeloxColumnarToRow (43) - +- TakeOrderedAndProjectExecTransformer (42) - +- ^ ProjectExecTransformer (40) - +- ^ RegularHashAggregateExecTransformer (39) - +- ^ RegularHashAggregateExecTransformer (38) - +- ^ ProjectExecTransformer (37) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) - :- ^ InputIteratorTransformer (26) - : +- ShuffleQueryStage (24) - : +- ColumnarExchange (23) - : +- VeloxResizeBatches (22) - : +- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ ScanTransformer parquet (10) - +- ^ InputIteratorTransformer (35) - +- ShuffleQueryStage (33) - +- ColumnarExchange (32) - +- VeloxResizeBatches (31) - +- ^ ProjectExecTransformer (29) - +- ^ FilterExecTransformer (28) - +- ^ ScanTransformer parquet (27) + VeloxColumnarToRow (42) + +- TakeOrderedAndProjectExecTransformer (41) + +- ^ ProjectExecTransformer (39) + +- ^ RegularHashAggregateExecTransformer (38) + +- ^ ProjectExecTransformer (37) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) + :- ^ InputIteratorTransformer (26) + : +- ShuffleQueryStage (24) + : +- ColumnarExchange (23) + : +- VeloxResizeBatches (22) + : +- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ ScanTransformer parquet (10) + +- ^ InputIteratorTransformer (35) + +- ShuffleQueryStage (33) + +- ColumnarExchange (32) + +- VeloxResizeBatches (31) + +- ^ ProjectExecTransformer (29) + +- ^ FilterExecTransformer (28) + +- ^ ScanTransformer parquet (27) +- == Initial Plan == - TakeOrderedAndProject (66) - +- HashAggregate (65) - +- HashAggregate (64) - +- Project (63) - +- SortMergeJoin Inner (62) - :- Sort (56) - : +- Exchange (55) - : +- Project (54) - : +- SortMergeJoin Inner (53) - : :- Sort (48) - : : +- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Sort (52) - : +- Exchange (51) - : +- Filter (50) - : +- Scan parquet (49) - +- Sort (61) - +- Exchange (60) - +- Project (59) - +- Filter (58) - +- Scan parquet (57) + TakeOrderedAndProject (65) + +- HashAggregate (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (55) + : +- Exchange (54) + : +- Project (53) + : +- SortMergeJoin Inner (52) + : :- Sort (47) + : : +- Exchange (46) + : : +- Project (45) + : : +- Filter (44) + : : +- Scan parquet (43) + : +- Sort (51) + : +- Exchange (50) + : +- Filter (49) + : +- Scan parquet (48) + +- Sort (60) + +- Exchange (59) + +- Project (58) + +- Filter (57) + +- Scan parquet (56) (1) ScanTransformer parquet @@ -215,141 +214,134 @@ Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_exten (38) RegularHashAggregateExecTransformer Input [6]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] - -(39) RegularHashAggregateExecTransformer -Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] -(40) ProjectExecTransformer +(39) ProjectExecTransformer Output [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X, o_orderdate#X, o_shippriority#X] Input [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] -(41) WholeStageCodegenTransformer (X) +(40) WholeStageCodegenTransformer (X) Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: false -(42) TakeOrderedAndProjectExecTransformer +(41) TakeOrderedAndProjectExecTransformer Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X], 0 -(43) VeloxColumnarToRow +(42) VeloxColumnarToRow Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(44) Scan parquet +(43) Scan parquet Output [2]: [c_custkey#X, c_mktsegment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_mktsegment), EqualTo(c_mktsegment,BUILDING), IsNotNull(c_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [2]: [c_custkey#X, c_mktsegment#X] Condition : ((isnotnull(c_mktsegment#X) AND (c_mktsegment#X = BUILDING)) AND isnotnull(c_custkey#X)) -(46) Project +(45) Project Output [1]: [c_custkey#X] Input [2]: [c_custkey#X, c_mktsegment#X] -(47) Exchange +(46) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(49) Scan parquet +(48) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(50) Filter +(49) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(51) Exchange +(50) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(51) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(53) SortMergeJoin +(52) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(54) Project +(53) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(55) Exchange +(54) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Sort +(55) Sort Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(57) Scan parquet +(56) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(58) Filter +(57) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(59) Project +(58) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(60) Exchange +(59) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(61) Sort +(60) Sort Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(62) SortMergeJoin +(61) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(63) Project +(62) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(64) HashAggregate +(63) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(65) HashAggregate +(64) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(66) TakeOrderedAndProject +(65) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(67) AdaptiveSparkPlan +(66) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt index cc1f4d31b697..55d544b898c6 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt @@ -424,55 +424,54 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (136) +AdaptiveSparkPlan (135) +- == Final Plan == - VeloxColumnarToRow (114) - +- ^ ProjectExecTransformer (112) - +- ^ RegularHashAggregateExecTransformer (111) - +- ^ RegularHashAggregateExecTransformer (110) - +- ^ ProjectExecTransformer (109) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) - :- ^ InputIteratorTransformer (103) - : +- ShuffleQueryStage (101), Statistics(X) - : +- ColumnarExchange (100) - : +- VeloxResizeBatches (99) - : +- ^ ProjectExecTransformer (97) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) - : :- ^ InputIteratorTransformer (91) - : : +- ShuffleQueryStage (89), Statistics(X) - : : +- ColumnarExchange (88) - : : +- VeloxResizeBatches (87) - : : +- ^ ProjectExecTransformer (85) - : : +- ^ FilterExecTransformer (84) - : : +- ^ ScanTransformer parquet (83) - : +- ^ InputIteratorTransformer (95) - : +- ShuffleQueryStage (93), Statistics(X) - : +- ReusedExchange (92) - +- ^ InputIteratorTransformer (107) - +- ShuffleQueryStage (105), Statistics(X) - +- ReusedExchange (104) + VeloxColumnarToRow (113) + +- ^ ProjectExecTransformer (111) + +- ^ RegularHashAggregateExecTransformer (110) + +- ^ ProjectExecTransformer (109) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) + :- ^ InputIteratorTransformer (103) + : +- ShuffleQueryStage (101), Statistics(X) + : +- ColumnarExchange (100) + : +- VeloxResizeBatches (99) + : +- ^ ProjectExecTransformer (97) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) + : :- ^ InputIteratorTransformer (91) + : : +- ShuffleQueryStage (89), Statistics(X) + : : +- ColumnarExchange (88) + : : +- VeloxResizeBatches (87) + : : +- ^ ProjectExecTransformer (85) + : : +- ^ FilterExecTransformer (84) + : : +- ^ ScanTransformer parquet (83) + : +- ^ InputIteratorTransformer (95) + : +- ShuffleQueryStage (93), Statistics(X) + : +- ReusedExchange (92) + +- ^ InputIteratorTransformer (107) + +- ShuffleQueryStage (105), Statistics(X) + +- ReusedExchange (104) +- == Initial Plan == - HashAggregate (135) - +- HashAggregate (134) - +- Project (133) - +- SortMergeJoin Inner (132) - :- Sort (126) - : +- Exchange (125) - : +- Project (124) - : +- SortMergeJoin Inner (123) - : :- Sort (118) - : : +- Exchange (117) - : : +- Filter (116) - : : +- Scan parquet (115) - : +- Sort (122) - : +- Exchange (121) - : +- Filter (120) - : +- Scan parquet (119) - +- Sort (131) - +- Exchange (130) - +- Project (129) - +- Filter (128) - +- Scan parquet (127) + HashAggregate (134) + +- HashAggregate (133) + +- Project (132) + +- SortMergeJoin Inner (131) + :- Sort (125) + : +- Exchange (124) + : +- Project (123) + : +- SortMergeJoin Inner (122) + : :- Sort (117) + : : +- Exchange (116) + : : +- Filter (115) + : : +- Scan parquet (114) + : +- Sort (121) + : +- Exchange (120) + : +- Filter (119) + : +- Scan parquet (118) + +- Sort (130) + +- Exchange (129) + +- Project (128) + +- Filter (127) + +- Scan parquet (126) (83) ScanTransformer parquet @@ -581,129 +580,122 @@ Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] (110) RegularHashAggregateExecTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] Keys: [] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(111) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] -Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] -(112) ProjectExecTransformer +(111) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Input [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] -(113) WholeStageCodegenTransformer (X) +(112) WholeStageCodegenTransformer (X) Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: false -(114) VeloxColumnarToRow +(113) VeloxColumnarToRow Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(115) Scan parquet +(114) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(116) Filter +(115) Filter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Condition : isnotnull(ps_suppkey#X) -(117) Exchange +(116) Exchange Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Sort +(117) Sort Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 -(119) Scan parquet +(118) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(120) Filter +(119) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(121) Exchange +(120) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) Sort +(121) Sort Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 -(123) SortMergeJoin +(122) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(124) Project +(123) Project Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(125) Exchange +(124) Exchange Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) Sort +(125) Sort Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 -(127) Scan parquet +(126) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(128) Filter +(127) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(129) Project +(128) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(130) Exchange +(129) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(131) Sort +(130) Sort Input [1]: [n_nationkey#X] Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 -(132) SortMergeJoin +(131) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(133) Project +(132) Project Output [2]: [ps_availqty#X, ps_supplycost#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(134) HashAggregate +(133) HashAggregate Input [2]: [ps_availqty#X, ps_supplycost#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(135) HashAggregate +(134) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(136) AdaptiveSparkPlan +(135) AdaptiveSparkPlan Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt index bd84f11c6ac5..7c3509c3e726 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt @@ -1,55 +1,54 @@ == Physical Plan == -AdaptiveSparkPlan (58) +AdaptiveSparkPlan (57) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ SortExecTransformer (38) - +- ^ InputIteratorTransformer (37) - +- ShuffleQueryStage (35), Statistics(X) - +- ColumnarExchange (34) - +- VeloxResizeBatches (33) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28), Statistics(X) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ ProjectExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ RegularHashAggregateExecTransformer (20) - +- ^ ProjectExecTransformer (19) - +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) - :- ^ InputIteratorTransformer (8) - : +- ShuffleQueryStage (6), Statistics(X) - : +- ColumnarExchange (5) - : +- VeloxResizeBatches (4) - : +- ^ ProjectExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (17) - +- ShuffleQueryStage (15), Statistics(X) - +- ColumnarExchange (14) - +- VeloxResizeBatches (13) - +- ^ ProjectExecTransformer (11) - +- ^ FilterExecTransformer (10) - +- ^ ScanTransformer parquet (9) + VeloxColumnarToRow (39) + +- ^ SortExecTransformer (37) + +- ^ InputIteratorTransformer (36) + +- ShuffleQueryStage (34), Statistics(X) + +- ColumnarExchange (33) + +- VeloxResizeBatches (32) + +- ^ RegularHashAggregateExecTransformer (30) + +- ^ InputIteratorTransformer (29) + +- ShuffleQueryStage (27), Statistics(X) + +- ColumnarExchange (26) + +- VeloxResizeBatches (25) + +- ^ ProjectExecTransformer (23) + +- ^ FlushableHashAggregateExecTransformer (22) + +- ^ ProjectExecTransformer (21) + +- ^ RegularHashAggregateExecTransformer (20) + +- ^ ProjectExecTransformer (19) + +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) + :- ^ InputIteratorTransformer (8) + : +- ShuffleQueryStage (6), Statistics(X) + : +- ColumnarExchange (5) + : +- VeloxResizeBatches (4) + : +- ^ ProjectExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (17) + +- ShuffleQueryStage (15), Statistics(X) + +- ColumnarExchange (14) + +- VeloxResizeBatches (13) + +- ^ ProjectExecTransformer (11) + +- ^ FilterExecTransformer (10) + +- ^ ScanTransformer parquet (9) +- == Initial Plan == - Sort (57) - +- Exchange (56) - +- HashAggregate (55) - +- Exchange (54) - +- HashAggregate (53) - +- HashAggregate (52) - +- HashAggregate (51) - +- Project (50) - +- SortMergeJoin LeftOuter (49) - :- Sort (43) - : +- Exchange (42) - : +- Scan parquet (41) - +- Sort (48) - +- Exchange (47) - +- Project (46) - +- Filter (45) - +- Scan parquet (44) + Sort (56) + +- Exchange (55) + +- HashAggregate (54) + +- Exchange (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin LeftOuter (48) + :- Sort (42) + : +- Exchange (41) + : +- Scan parquet (40) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) ScanTransformer parquet @@ -133,180 +132,173 @@ Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] (20) RegularHashAggregateExecTransformer Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] -Functions [1]: [partial_count(o_orderkey#X)] -Aggregate Attributes [1]: [count#X] -Results [2]: [c_custkey#X, count#X] - -(21) RegularHashAggregateExecTransformer -Input [2]: [c_custkey#X, count#X] -Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [2]: [c_custkey#X, count(o_orderkey#X)#X] -(22) ProjectExecTransformer +(21) ProjectExecTransformer Output [1]: [count(o_orderkey#X)#X AS c_count#X] Input [2]: [c_custkey#X, count(o_orderkey#X)#X] -(23) FlushableHashAggregateExecTransformer +(22) FlushableHashAggregateExecTransformer Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(24) ProjectExecTransformer +(23) ProjectExecTransformer Output [3]: [hash(c_count#X, 42) AS hash_partition_key#X, c_count#X, count#X] Input [2]: [c_count#X, count#X] -(25) WholeStageCodegenTransformer (X) +(24) WholeStageCodegenTransformer (X) Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: false -(26) VeloxResizeBatches +(25) VeloxResizeBatches Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: X, X -(27) ColumnarExchange +(26) ColumnarExchange Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [c_count#X, count#X], [plan_id=X], [shuffle_writer_type=hash] -(28) ShuffleQueryStage +(27) ShuffleQueryStage Output [2]: [c_count#X, count#X] Arguments: X -(29) InputAdapter +(28) InputAdapter Input [2]: [c_count#X, count#X] -(30) InputIteratorTransformer +(29) InputIteratorTransformer Input [2]: [c_count#X, count#X] -(31) RegularHashAggregateExecTransformer +(30) RegularHashAggregateExecTransformer Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(32) WholeStageCodegenTransformer (X) +(31) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(33) VeloxResizeBatches +(32) VeloxResizeBatches Input [2]: [c_count#X, custdist#X] Arguments: X, X -(34) ColumnarExchange +(33) ColumnarExchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(35) ShuffleQueryStage +(34) ShuffleQueryStage Output [2]: [c_count#X, custdist#X] Arguments: X -(36) InputAdapter +(35) InputAdapter Input [2]: [c_count#X, custdist#X] -(37) InputIteratorTransformer +(36) InputIteratorTransformer Input [2]: [c_count#X, custdist#X] -(38) SortExecTransformer +(37) SortExecTransformer Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [2]: [c_count#X, custdist#X] -(41) Scan parquet +(40) Scan parquet Output [1]: [c_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(41) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Sort +(42) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(44) Scan parquet +(43) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(46) Project +(45) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(47) Exchange +(46) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [2]: [o_orderkey#X, o_custkey#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(49) SortMergeJoin +(48) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(50) Project +(49) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(51) HashAggregate +(50) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(52) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(53) HashAggregate +(52) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(54) Exchange +(53) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) HashAggregate +(54) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(56) Exchange +(55) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(57) Sort +(56) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(58) AdaptiveSparkPlan +(57) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt index 5fdc418d02ef..1161b32937bc 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt @@ -1,40 +1,39 @@ == Physical Plan == -AdaptiveSparkPlan (39) +AdaptiveSparkPlan (38) +- == Final Plan == - VeloxColumnarToRow (25) - +- ^ ProjectExecTransformer (23) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7), Statistics(X) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16), Statistics(X) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ ScanTransformer parquet (10) + VeloxColumnarToRow (24) + +- ^ ProjectExecTransformer (22) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7), Statistics(X) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16), Statistics(X) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ ScanTransformer parquet (10) +- == Initial Plan == - HashAggregate (38) - +- HashAggregate (37) - +- Project (36) - +- SortMergeJoin Inner (35) - :- Sort (30) - : +- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Sort (34) - +- Exchange (33) - +- Filter (32) - +- Scan parquet (31) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) ScanTransformer parquet @@ -123,93 +122,86 @@ Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] (21) RegularHashAggregateExecTransformer Input [5]: [l_extendedprice#X, l_discount#X, p_type#X, _pre_X#X, _pre_X#X] Keys: [] -Functions [2]: [partial_sum(_pre_X#X), partial_sum(_pre_X#X)] -Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Keys: [] -Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] +Functions [2]: [sum(_pre_X#X), sum(_pre_X#X)] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(23) ProjectExecTransformer +(22) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X)), DecimalType(38,6))) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X as decimal(38,6)))), DecimalType(38,6)) AS promo_revenue#X] Input [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(24) WholeStageCodegenTransformer (X) +(23) WholeStageCodegenTransformer (X) Input [1]: [promo_revenue#X] Arguments: false -(25) VeloxColumnarToRow +(24) VeloxColumnarToRow Input [1]: [promo_revenue#X] -(26) Scan parquet +(25) Scan parquet Output [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-09-01), LessThan(l_shipdate,1995-10-01), IsNotNull(l_partkey)] ReadSchema: struct -(27) Filter +(26) Filter Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-09-01)) AND (l_shipdate#X < 1995-10-01)) AND isnotnull(l_partkey#X)) -(28) Project +(27) Project Output [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(29) Exchange +(28) Exchange Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Sort +(29) Sort Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(31) Scan parquet +(30) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(32) Filter +(31) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(33) Exchange +(32) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Sort +(33) Sort Input [2]: [p_partkey#X, p_type#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(35) SortMergeJoin +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(36) Project +(35) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(37) HashAggregate +(36) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(38) HashAggregate +(37) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X)), DecimalType(38,6))) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X as decimal(38,6)))), DecimalType(38,6)) AS promo_revenue#X] -(39) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt index 2d327719c3bd..398cc1f5b8b9 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt @@ -257,31 +257,30 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (73) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRow (64) - +- ^ RegularHashAggregateExecTransformer (62) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ ProjectExecTransformer (60) - +- ^ RegularHashAggregateExecTransformer (59) - +- ^ InputIteratorTransformer (58) - +- ShuffleQueryStage (56), Statistics(X) - +- ColumnarExchange (55) - +- VeloxResizeBatches (54) - +- ^ ProjectExecTransformer (52) - +- ^ FlushableHashAggregateExecTransformer (51) - +- ^ ProjectExecTransformer (50) - +- ^ FilterExecTransformer (49) - +- ^ ScanTransformer parquet (48) + VeloxColumnarToRow (63) + +- ^ RegularHashAggregateExecTransformer (61) + +- ^ ProjectExecTransformer (60) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- VeloxResizeBatches (54) + +- ^ ProjectExecTransformer (52) + +- ^ FlushableHashAggregateExecTransformer (51) + +- ^ ProjectExecTransformer (50) + +- ^ FilterExecTransformer (49) + +- ^ ScanTransformer parquet (48) +- == Initial Plan == - HashAggregate (72) - +- HashAggregate (71) - +- HashAggregate (70) - +- Exchange (69) - +- HashAggregate (68) - +- Project (67) - +- Filter (66) - +- Scan parquet (65) + HashAggregate (71) + +- HashAggregate (70) + +- HashAggregate (69) + +- Exchange (68) + +- HashAggregate (67) + +- Project (66) + +- Filter (65) + +- Scan parquet (64) (48) ScanTransformer parquet @@ -346,71 +345,64 @@ Input [2]: [l_suppkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedpri (61) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] -Functions [1]: [partial_max(total_revenue#X)] -Aggregate Attributes [1]: [max#X] -Results [1]: [max#X] - -(62) RegularHashAggregateExecTransformer -Input [1]: [max#X] -Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(63) WholeStageCodegenTransformer (X) +(62) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(64) VeloxColumnarToRow +(63) VeloxColumnarToRow Input [1]: [max(total_revenue)#X] -(65) Scan parquet +(64) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(66) Filter +(65) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(67) Project +(66) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(68) HashAggregate +(67) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(69) Exchange +(68) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) HashAggregate +(69) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] -(71) HashAggregate +(70) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(72) HashAggregate +(71) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(73) AdaptiveSparkPlan +(72) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt index c5033fa773a5..843ce9b39eb6 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt @@ -1,62 +1,61 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (62) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ ProjectExecTransformer (38) - +- ^ RegularHashAggregateExecTransformer (37) - +- ^ RegularHashAggregateExecTransformer (36) - +- ^ ProjectExecTransformer (35) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) - :- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16), Statistics(X) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ ScanTransformer parquet (10) - +- ^ FilterExecTransformer (33) - +- ^ ProjectExecTransformer (32) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28), Statistics(X) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ FilterExecTransformer (22) - +- ^ ScanTransformer parquet (21) + VeloxColumnarToRow (39) + +- ^ ProjectExecTransformer (37) + +- ^ RegularHashAggregateExecTransformer (36) + +- ^ ProjectExecTransformer (35) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) + :- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16), Statistics(X) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ ScanTransformer parquet (10) + +- ^ FilterExecTransformer (33) + +- ^ ProjectExecTransformer (32) + +- ^ RegularHashAggregateExecTransformer (31) + +- ^ InputIteratorTransformer (30) + +- ShuffleQueryStage (28), Statistics(X) + +- ColumnarExchange (27) + +- VeloxResizeBatches (26) + +- ^ ProjectExecTransformer (24) + +- ^ FlushableHashAggregateExecTransformer (23) + +- ^ FilterExecTransformer (22) + +- ^ ScanTransformer parquet (21) +- == Initial Plan == - HashAggregate (62) - +- HashAggregate (61) - +- Project (60) - +- SortMergeJoin Inner (59) - :- Project (51) - : +- SortMergeJoin Inner (50) - : :- Sort (44) - : : +- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Sort (49) - : +- Exchange (48) - : +- Project (47) - : +- Filter (46) - : +- Scan parquet (45) - +- Sort (58) - +- Filter (57) - +- HashAggregate (56) - +- Exchange (55) - +- HashAggregate (54) - +- Filter (53) - +- Scan parquet (52) + HashAggregate (61) + +- HashAggregate (60) + +- Project (59) + +- SortMergeJoin Inner (58) + :- Project (50) + : +- SortMergeJoin Inner (49) + : :- Sort (43) + : : +- Exchange (42) + : : +- Filter (41) + : : +- Scan parquet (40) + : +- Sort (48) + : +- Exchange (47) + : +- Project (46) + : +- Filter (45) + : +- Scan parquet (44) + +- Sort (57) + +- Filter (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- Filter (52) + +- Scan parquet (51) (1) ScanTransformer parquet @@ -213,139 +212,132 @@ Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity) (36) RegularHashAggregateExecTransformer Input [1]: [l_extendedprice#X] Keys: [] -Functions [1]: [partial_sum(l_extendedprice#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(37) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [sum(l_extendedprice#X)#X] -(38) ProjectExecTransformer +(37) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6)) AS avg_yearly#X] Input [1]: [sum(l_extendedprice#X)#X] -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [1]: [avg_yearly#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [1]: [avg_yearly#X] -(41) Scan parquet +(40) Scan parquet Output [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_quantity)] ReadSchema: struct -(42) Filter +(41) Filter Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) -(43) Exchange +(42) Exchange Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(43) Sort Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(45) Scan parquet +(44) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(46) Filter +(45) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(47) Project +(46) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(48) Exchange +(47) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(48) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(50) SortMergeJoin +(49) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(51) Project +(50) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(52) Scan parquet +(51) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(53) Filter +(52) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(54) HashAggregate +(53) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(55) Exchange +(54) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) HashAggregate +(55) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [CheckOverflow((0.200000 * promote_precision(avg(l_quantity#X)#X)), DecimalType(18,7)) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(57) Filter +(56) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(58) Sort +(57) Sort Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(59) SortMergeJoin +(58) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(60) Project +(59) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(61) HashAggregate +(60) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(62) HashAggregate +(61) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6)) AS avg_yearly#X] -(63) AdaptiveSparkPlan +(62) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt index b07b35495922..1119ddcf80be 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt @@ -1,102 +1,101 @@ == Physical Plan == -AdaptiveSparkPlan (110) +AdaptiveSparkPlan (109) +- == Final Plan == - VeloxColumnarToRow (70) - +- TakeOrderedAndProjectExecTransformer (69) - +- ^ RegularHashAggregateExecTransformer (67) - +- ^ RegularHashAggregateExecTransformer (66) - +- ^ ProjectExecTransformer (65) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) - :- ^ InputIteratorTransformer (46) - : +- ShuffleQueryStage (44), Statistics(X) - : +- ColumnarExchange (43) - : +- VeloxResizeBatches (42) - : +- ^ ProjectExecTransformer (40) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (38) - : +- ShuffleQueryStage (36), Statistics(X) - : +- ColumnarExchange (35) - : +- VeloxResizeBatches (34) - : +- ^ ProjectExecTransformer (32) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) - : :- ^ InputIteratorTransformer (18) - : : +- ShuffleQueryStage (16), Statistics(X) - : : +- ColumnarExchange (15) - : : +- VeloxResizeBatches (14) - : : +- ^ ProjectExecTransformer (12) - : : +- ^ FilterExecTransformer (11) - : : +- ^ ScanTransformer parquet (10) - : +- ^ ProjectExecTransformer (30) - : +- ^ FilterExecTransformer (29) - : +- ^ RegularHashAggregateExecTransformer (28) - : +- ^ InputIteratorTransformer (27) - : +- ShuffleQueryStage (25), Statistics(X) - : +- ColumnarExchange (24) - : +- VeloxResizeBatches (23) - : +- ^ ProjectExecTransformer (21) - : +- ^ FlushableHashAggregateExecTransformer (20) - : +- ^ ScanTransformer parquet (19) - +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) - :- ^ InputIteratorTransformer (55) - : +- ShuffleQueryStage (53), Statistics(X) - : +- ColumnarExchange (52) - : +- VeloxResizeBatches (51) - : +- ^ ProjectExecTransformer (49) - : +- ^ FilterExecTransformer (48) - : +- ^ ScanTransformer parquet (47) - +- ^ ProjectExecTransformer (62) - +- ^ FilterExecTransformer (61) - +- ^ RegularHashAggregateExecTransformer (60) - +- ^ InputIteratorTransformer (59) - +- ShuffleQueryStage (57), Statistics(X) - +- ReusedExchange (56) + VeloxColumnarToRow (69) + +- TakeOrderedAndProjectExecTransformer (68) + +- ^ RegularHashAggregateExecTransformer (66) + +- ^ ProjectExecTransformer (65) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) + :- ^ InputIteratorTransformer (46) + : +- ShuffleQueryStage (44), Statistics(X) + : +- ColumnarExchange (43) + : +- VeloxResizeBatches (42) + : +- ^ ProjectExecTransformer (40) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (38) + : +- ShuffleQueryStage (36), Statistics(X) + : +- ColumnarExchange (35) + : +- VeloxResizeBatches (34) + : +- ^ ProjectExecTransformer (32) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) + : :- ^ InputIteratorTransformer (18) + : : +- ShuffleQueryStage (16), Statistics(X) + : : +- ColumnarExchange (15) + : : +- VeloxResizeBatches (14) + : : +- ^ ProjectExecTransformer (12) + : : +- ^ FilterExecTransformer (11) + : : +- ^ ScanTransformer parquet (10) + : +- ^ ProjectExecTransformer (30) + : +- ^ FilterExecTransformer (29) + : +- ^ RegularHashAggregateExecTransformer (28) + : +- ^ InputIteratorTransformer (27) + : +- ShuffleQueryStage (25), Statistics(X) + : +- ColumnarExchange (24) + : +- VeloxResizeBatches (23) + : +- ^ ProjectExecTransformer (21) + : +- ^ FlushableHashAggregateExecTransformer (20) + : +- ^ ScanTransformer parquet (19) + +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) + :- ^ InputIteratorTransformer (55) + : +- ShuffleQueryStage (53), Statistics(X) + : +- ColumnarExchange (52) + : +- VeloxResizeBatches (51) + : +- ^ ProjectExecTransformer (49) + : +- ^ FilterExecTransformer (48) + : +- ^ ScanTransformer parquet (47) + +- ^ ProjectExecTransformer (62) + +- ^ FilterExecTransformer (61) + +- ^ RegularHashAggregateExecTransformer (60) + +- ^ InputIteratorTransformer (59) + +- ShuffleQueryStage (57), Statistics(X) + +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (109) - +- HashAggregate (108) - +- HashAggregate (107) - +- Project (106) - +- SortMergeJoin Inner (105) - :- Sort (92) - : +- Exchange (91) - : +- Project (90) - : +- SortMergeJoin Inner (89) - : :- Sort (74) - : : +- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Sort (88) - : +- Exchange (87) - : +- SortMergeJoin LeftSemi (86) - : :- Sort (78) - : : +- Exchange (77) - : : +- Filter (76) - : : +- Scan parquet (75) - : +- Sort (85) - : +- Project (84) - : +- Filter (83) - : +- HashAggregate (82) - : +- Exchange (81) - : +- HashAggregate (80) - : +- Scan parquet (79) - +- SortMergeJoin LeftSemi (104) - :- Sort (96) - : +- Exchange (95) - : +- Filter (94) - : +- Scan parquet (93) - +- Sort (103) - +- Project (102) - +- Filter (101) - +- HashAggregate (100) - +- Exchange (99) - +- HashAggregate (98) - +- Scan parquet (97) + TakeOrderedAndProject (108) + +- HashAggregate (107) + +- HashAggregate (106) + +- Project (105) + +- SortMergeJoin Inner (104) + :- Sort (91) + : +- Exchange (90) + : +- Project (89) + : +- SortMergeJoin Inner (88) + : :- Sort (73) + : : +- Exchange (72) + : : +- Filter (71) + : : +- Scan parquet (70) + : +- Sort (87) + : +- Exchange (86) + : +- SortMergeJoin LeftSemi (85) + : :- Sort (77) + : : +- Exchange (76) + : : +- Filter (75) + : : +- Scan parquet (74) + : +- Sort (84) + : +- Project (83) + : +- Filter (82) + : +- HashAggregate (81) + : +- Exchange (80) + : +- HashAggregate (79) + : +- Scan parquet (78) + +- SortMergeJoin LeftSemi (103) + :- Sort (95) + : +- Exchange (94) + : +- Filter (93) + : +- Scan parquet (92) + +- Sort (102) + +- Project (101) + +- Filter (100) + +- HashAggregate (99) + +- Exchange (98) + +- HashAggregate (97) + +- Scan parquet (96) (1) ScanTransformer parquet @@ -371,219 +370,212 @@ Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, (66) RegularHashAggregateExecTransformer Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] - -(67) RegularHashAggregateExecTransformer -Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(68) WholeStageCodegenTransformer (X) +(67) WholeStageCodegenTransformer (X) Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: false -(69) TakeOrderedAndProjectExecTransformer +(68) TakeOrderedAndProjectExecTransformer Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X], 0 -(70) VeloxColumnarToRow +(69) VeloxColumnarToRow Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(71) Scan parquet +(70) Scan parquet Output [2]: [c_custkey#X, c_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey)] ReadSchema: struct -(72) Filter +(71) Filter Input [2]: [c_custkey#X, c_name#X] Condition : isnotnull(c_custkey#X) -(73) Exchange +(72) Exchange Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Sort +(73) Sort Input [2]: [c_custkey#X, c_name#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(75) Scan parquet +(74) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(76) Filter +(75) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(77) Exchange +(76) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Sort +(77) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(79) Scan parquet +(78) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(80) HashAggregate +(79) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(81) Exchange +(80) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) HashAggregate +(81) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(83) Filter +(82) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(84) Project +(83) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(85) Sort +(84) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(86) SortMergeJoin +(85) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(87) Exchange +(86) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Sort +(87) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(89) SortMergeJoin +(88) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(90) Project +(89) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(91) Exchange +(90) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) Sort +(91) Sort Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(93) Scan parquet +(92) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(94) Filter +(93) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(95) Exchange +(94) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(96) Sort +(95) Sort Input [2]: [l_orderkey#X, l_quantity#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(97) Scan parquet +(96) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(98) HashAggregate +(97) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(99) Exchange +(98) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) HashAggregate +(99) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(101) Filter +(100) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(102) Project +(101) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(103) Sort +(102) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(104) SortMergeJoin +(103) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(105) SortMergeJoin +(104) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(106) Project +(105) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(107) HashAggregate +(106) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(108) HashAggregate +(107) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(109) TakeOrderedAndProject +(108) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(110) AdaptiveSparkPlan +(109) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt index 348cc4ce85d5..31341db339e1 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt @@ -1,39 +1,38 @@ == Physical Plan == -AdaptiveSparkPlan (38) +AdaptiveSparkPlan (37) +- == Final Plan == - VeloxColumnarToRow (24) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7), Statistics(X) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16), Statistics(X) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ ScanTransformer parquet (10) + VeloxColumnarToRow (23) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7), Statistics(X) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16), Statistics(X) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ ScanTransformer parquet (10) +- == Initial Plan == - HashAggregate (37) - +- HashAggregate (36) - +- Project (35) - +- SortMergeJoin Inner (34) - :- Sort (29) - : +- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Sort (33) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (36) + +- HashAggregate (35) + +- Project (34) + +- SortMergeJoin Inner (33) + :- Sort (28) + : +- Exchange (27) + : +- Project (26) + : +- Filter (25) + : +- Scan parquet (24) + +- Sort (32) + +- Exchange (31) + +- Filter (30) + +- Scan parquet (29) (1) ScanTransformer parquet @@ -122,89 +121,82 @@ Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partke (21) RegularHashAggregateExecTransformer Input [3]: [l_extendedprice#X, l_discount#X, _pre_X#X] Keys: [] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X] -(23) WholeStageCodegenTransformer (X) +(22) WholeStageCodegenTransformer (X) Input [1]: [revenue#X] Arguments: false -(24) VeloxColumnarToRow +(23) VeloxColumnarToRow Input [1]: [revenue#X] -(25) Scan parquet +(24) Scan parquet Output [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipinstruct), In(l_shipmode, [AIR,AIR REG]), EqualTo(l_shipinstruct,DELIVER IN PERSON), IsNotNull(l_partkey), Or(Or(And(GreaterThanOrEqual(l_quantity,1.00),LessThanOrEqual(l_quantity,11.00)),And(GreaterThanOrEqual(l_quantity,10.00),LessThanOrEqual(l_quantity,20.00))),And(GreaterThanOrEqual(l_quantity,20.00),LessThanOrEqual(l_quantity,30.00)))] ReadSchema: struct -(26) Filter +(25) Filter Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Condition : ((((isnotnull(l_shipinstruct#X) AND l_shipmode#X IN (AIR,AIR REG)) AND (l_shipinstruct#X = DELIVER IN PERSON)) AND isnotnull(l_partkey#X)) AND ((((l_quantity#X >= 1.00) AND (l_quantity#X <= 11.00)) OR ((l_quantity#X >= 10.00) AND (l_quantity#X <= 20.00))) OR ((l_quantity#X >= 20.00) AND (l_quantity#X <= 30.00)))) -(27) Project +(26) Project Output [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] -(28) Exchange +(27) Exchange Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Sort +(28) Sort Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(30) Scan parquet +(29) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(31) Filter +(30) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(32) Exchange +(31) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) Sort +(32) Sort Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(34) SortMergeJoin +(33) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(35) Project +(34) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(36) HashAggregate +(35) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(37) HashAggregate +(36) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X] -(38) AdaptiveSparkPlan +(37) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt index 703c4a99d63c..9a2c56040b38 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt @@ -1,19 +1,19 @@ == Physical Plan == -AdaptiveSparkPlan (143) +AdaptiveSparkPlan (142) +- == Final Plan == - VeloxColumnarToRow (93) - +- AQEShuffleRead (92) - +- ShuffleQueryStage (91), Statistics(X) - +- ColumnarExchange (90) - +- VeloxResizeBatches (89) - +- ^ ProjectExecTransformer (87) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (86) - :- ^ InputIteratorTransformer (76) - : +- ShuffleQueryStage (74), Statistics(X) - : +- ColumnarExchange (73) - : +- VeloxResizeBatches (72) - : +- ^ ProjectExecTransformer (70) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (69) + VeloxColumnarToRow (92) + +- AQEShuffleRead (91) + +- ShuffleQueryStage (90), Statistics(X) + +- ColumnarExchange (89) + +- VeloxResizeBatches (88) + +- ^ ProjectExecTransformer (86) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (85) + :- ^ InputIteratorTransformer (75) + : +- ShuffleQueryStage (73), Statistics(X) + : +- ColumnarExchange (72) + : +- VeloxResizeBatches (71) + : +- ^ ProjectExecTransformer (69) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (68) : :- ^ InputIteratorTransformer (9) : : +- ShuffleQueryStage (7), Statistics(X) : : +- ColumnarExchange (6) @@ -21,12 +21,12 @@ AdaptiveSparkPlan (143) : : +- ^ ProjectExecTransformer (3) : : +- ^ FilterExecTransformer (2) : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (68) - : +- ShuffleQueryStage (66), Statistics(X) - : +- ColumnarExchange (65) - : +- VeloxResizeBatches (64) - : +- ^ ProjectExecTransformer (62) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (61) + : +- ^ InputIteratorTransformer (67) + : +- ShuffleQueryStage (65), Statistics(X) + : +- ColumnarExchange (64) + : +- VeloxResizeBatches (63) + : +- ^ ProjectExecTransformer (61) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (60) : :- ^ InputIteratorTransformer (35) : : +- ShuffleQueryStage (33), Statistics(X) : : +- ColumnarExchange (32) @@ -47,83 +47,82 @@ AdaptiveSparkPlan (143) : : +- ^ ProjectExecTransformer (21) : : +- ^ FilterExecTransformer (20) : : +- ^ ScanTransformer parquet (19) - : +- ^ InputIteratorTransformer (60) - : +- ShuffleQueryStage (58), Statistics(X) - : +- ColumnarExchange (57) - : +- VeloxResizeBatches (56) - : +- ^ ProjectExecTransformer (54) - : +- ^ FilterExecTransformer (53) - : +- ^ ProjectExecTransformer (52) - : +- ^ RegularHashAggregateExecTransformer (51) - : +- ^ RegularHashAggregateExecTransformer (50) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) - : :- ^ InputIteratorTransformer (44) - : : +- ShuffleQueryStage (42), Statistics(X) - : : +- ColumnarExchange (41) - : : +- VeloxResizeBatches (40) - : : +- ^ ProjectExecTransformer (38) - : : +- ^ FilterExecTransformer (37) - : : +- ^ ScanTransformer parquet (36) - : +- ^ InputIteratorTransformer (48) - : +- ShuffleQueryStage (46), Statistics(X) - : +- ReusedExchange (45) - +- ^ InputIteratorTransformer (85) - +- ShuffleQueryStage (83), Statistics(X) - +- ColumnarExchange (82) - +- VeloxResizeBatches (81) - +- ^ ProjectExecTransformer (79) - +- ^ FilterExecTransformer (78) - +- ^ ScanTransformer parquet (77) + : +- ^ InputIteratorTransformer (59) + : +- ShuffleQueryStage (57), Statistics(X) + : +- ColumnarExchange (56) + : +- VeloxResizeBatches (55) + : +- ^ ProjectExecTransformer (53) + : +- ^ FilterExecTransformer (52) + : +- ^ ProjectExecTransformer (51) + : +- ^ RegularHashAggregateExecTransformer (50) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) + : :- ^ InputIteratorTransformer (44) + : : +- ShuffleQueryStage (42), Statistics(X) + : : +- ColumnarExchange (41) + : : +- VeloxResizeBatches (40) + : : +- ^ ProjectExecTransformer (38) + : : +- ^ FilterExecTransformer (37) + : : +- ^ ScanTransformer parquet (36) + : +- ^ InputIteratorTransformer (48) + : +- ShuffleQueryStage (46), Statistics(X) + : +- ReusedExchange (45) + +- ^ InputIteratorTransformer (84) + +- ShuffleQueryStage (82), Statistics(X) + +- ColumnarExchange (81) + +- VeloxResizeBatches (80) + +- ^ ProjectExecTransformer (78) + +- ^ FilterExecTransformer (77) + +- ^ ScanTransformer parquet (76) +- == Initial Plan == - Sort (142) - +- Exchange (141) - +- Project (140) - +- SortMergeJoin Inner (139) - :- Sort (133) - : +- Exchange (132) - : +- Project (131) - : +- SortMergeJoin LeftSemi (130) - : :- Sort (97) - : : +- Exchange (96) - : : +- Filter (95) - : : +- Scan parquet (94) - : +- Sort (129) - : +- Exchange (128) - : +- Project (127) - : +- SortMergeJoin Inner (126) - : :- Sort (109) - : : +- Exchange (108) - : : +- SortMergeJoin LeftSemi (107) - : : :- Sort (101) - : : : +- Exchange (100) - : : : +- Filter (99) - : : : +- Scan parquet (98) - : : +- Sort (106) - : : +- Exchange (105) - : : +- Project (104) - : : +- Filter (103) - : : +- Scan parquet (102) - : +- Sort (125) - : +- Exchange (124) - : +- Filter (123) - : +- HashAggregate (122) - : +- HashAggregate (121) - : +- SortMergeJoin LeftSemi (120) - : :- Sort (114) - : : +- Exchange (113) - : : +- Project (112) - : : +- Filter (111) - : : +- Scan parquet (110) - : +- Sort (119) - : +- Exchange (118) - : +- Project (117) - : +- Filter (116) - : +- Scan parquet (115) - +- Sort (138) - +- Exchange (137) - +- Project (136) - +- Filter (135) - +- Scan parquet (134) + Sort (141) + +- Exchange (140) + +- Project (139) + +- SortMergeJoin Inner (138) + :- Sort (132) + : +- Exchange (131) + : +- Project (130) + : +- SortMergeJoin LeftSemi (129) + : :- Sort (96) + : : +- Exchange (95) + : : +- Filter (94) + : : +- Scan parquet (93) + : +- Sort (128) + : +- Exchange (127) + : +- Project (126) + : +- SortMergeJoin Inner (125) + : :- Sort (108) + : : +- Exchange (107) + : : +- SortMergeJoin LeftSemi (106) + : : :- Sort (100) + : : : +- Exchange (99) + : : : +- Filter (98) + : : : +- Scan parquet (97) + : : +- Sort (105) + : : +- Exchange (104) + : : +- Project (103) + : : +- Filter (102) + : : +- Scan parquet (101) + : +- Sort (124) + : +- Exchange (123) + : +- Filter (122) + : +- HashAggregate (121) + : +- HashAggregate (120) + : +- SortMergeJoin LeftSemi (119) + : :- Sort (113) + : : +- Exchange (112) + : : +- Project (111) + : : +- Filter (110) + : : +- Scan parquet (109) + : +- Sort (118) + : +- Exchange (117) + : +- Project (116) + : +- Filter (115) + : +- Scan parquet (114) + +- Sort (137) + +- Exchange (136) + +- Project (135) + +- Filter (134) + +- Scan parquet (133) (1) ScanTransformer parquet @@ -326,407 +325,400 @@ Join condition: None (50) RegularHashAggregateExecTransformer Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] - -(51) RegularHashAggregateExecTransformer -Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(52) ProjectExecTransformer +(51) ProjectExecTransformer Output [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3)) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(53) FilterExecTransformer +(52) FilterExecTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: isnotnull((0.5 * sum(l_quantity))#X) -(54) ProjectExecTransformer +(53) ProjectExecTransformer Output [4]: [hash(l_partkey#X, l_suppkey#X, 42) AS hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(55) WholeStageCodegenTransformer (X) +(54) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: false -(56) VeloxResizeBatches +(55) VeloxResizeBatches Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X, X -(57) ColumnarExchange +(56) ColumnarExchange Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(58) ShuffleQueryStage +(57) ShuffleQueryStage Output [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X -(59) InputAdapter +(58) InputAdapter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(60) InputIteratorTransformer +(59) InputIteratorTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(61) ShuffledHashJoinExecTransformer +(60) ShuffledHashJoinExecTransformer Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(62) ProjectExecTransformer +(61) ProjectExecTransformer Output [2]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(63) WholeStageCodegenTransformer (X) +(62) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: false -(64) VeloxResizeBatches +(63) VeloxResizeBatches Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: X, X -(65) ColumnarExchange +(64) ColumnarExchange Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(66) ShuffleQueryStage +(65) ShuffleQueryStage Output [1]: [ps_suppkey#X] Arguments: X -(67) InputAdapter +(66) InputAdapter Input [1]: [ps_suppkey#X] -(68) InputIteratorTransformer +(67) InputIteratorTransformer Input [1]: [ps_suppkey#X] -(69) ShuffledHashJoinExecTransformer +(68) ShuffledHashJoinExecTransformer Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(70) ProjectExecTransformer +(69) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(71) WholeStageCodegenTransformer (X) +(70) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: false -(72) VeloxResizeBatches +(71) VeloxResizeBatches Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: X, X -(73) ColumnarExchange +(72) ColumnarExchange Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [s_name#X, s_address#X, s_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(74) ShuffleQueryStage +(73) ShuffleQueryStage Output [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: X -(75) InputAdapter +(74) InputAdapter Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(76) InputIteratorTransformer +(75) InputIteratorTransformer Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(77) ScanTransformer parquet +(76) ScanTransformer parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(78) FilterExecTransformer +(77) FilterExecTransformer Input [2]: [n_nationkey#X, n_name#X] Arguments: ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(79) ProjectExecTransformer +(78) ProjectExecTransformer Output [2]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(80) WholeStageCodegenTransformer (X) +(79) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: false -(81) VeloxResizeBatches +(80) VeloxResizeBatches Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: X, X -(82) ColumnarExchange +(81) ColumnarExchange Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(83) ShuffleQueryStage +(82) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(84) InputAdapter +(83) InputAdapter Input [1]: [n_nationkey#X] -(85) InputIteratorTransformer +(84) InputIteratorTransformer Input [1]: [n_nationkey#X] -(86) ShuffledHashJoinExecTransformer +(85) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(87) ProjectExecTransformer +(86) ProjectExecTransformer Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(88) WholeStageCodegenTransformer (X) +(87) WholeStageCodegenTransformer (X) Input [2]: [s_name#X, s_address#X] Arguments: false -(89) VeloxResizeBatches +(88) VeloxResizeBatches Input [2]: [s_name#X, s_address#X] Arguments: X, X -(90) ColumnarExchange +(89) ColumnarExchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(91) ShuffleQueryStage +(90) ShuffleQueryStage Output [2]: [s_name#X, s_address#X] Arguments: X -(92) AQEShuffleRead +(91) AQEShuffleRead Input [2]: [s_name#X, s_address#X] Arguments: local -(93) VeloxColumnarToRow +(92) VeloxColumnarToRow Input [2]: [s_name#X, s_address#X] -(94) Scan parquet +(93) Scan parquet Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_nationkey)] ReadSchema: struct -(95) Filter +(94) Filter Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Condition : isnotnull(s_nationkey#X) -(96) Exchange +(95) Exchange Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) Sort +(96) Sort Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 -(98) Scan parquet +(97) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(99) Filter +(98) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(100) Exchange +(99) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(101) Sort +(100) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 -(102) Scan parquet +(101) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(103) Filter +(102) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(104) Project +(103) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(105) Exchange +(104) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Sort +(105) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(107) SortMergeJoin +(106) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(108) Exchange +(107) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Sort +(108) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 -(110) Scan parquet +(109) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(111) Filter +(110) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(112) Project +(111) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(113) Exchange +(112) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) Sort +(113) Sort Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(115) Scan parquet +(114) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(116) Filter +(115) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(117) Project +(116) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(118) Exchange +(117) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) Sort +(118) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(120) SortMergeJoin +(119) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(121) HashAggregate +(120) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(122) HashAggregate +(121) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3)) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(123) Filter +(122) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(124) Exchange +(123) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) Sort +(124) Sort Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 -(126) SortMergeJoin +(125) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(127) Project +(126) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(128) Exchange +(127) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Sort +(128) Sort Input [1]: [ps_suppkey#X] Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 -(130) SortMergeJoin +(129) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(131) Project +(130) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(132) Exchange +(131) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(133) Sort +(132) Sort Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 -(134) Scan parquet +(133) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(135) Filter +(134) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(136) Project +(135) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(137) Exchange +(136) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(137) Sort Input [1]: [n_nationkey#X] Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 -(139) SortMergeJoin +(138) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(140) Project +(139) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(141) Exchange +(140) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) Sort +(141) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(143) AdaptiveSparkPlan +(142) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt index f6dc9c8015ea..d09fe3e19a9b 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt @@ -1,64 +1,63 @@ == Physical Plan == -AdaptiveSparkPlan (67) +AdaptiveSparkPlan (66) +- == Final Plan == - VeloxColumnarToRow (43) - +- TakeOrderedAndProjectExecTransformer (42) - +- ^ ProjectExecTransformer (40) - +- ^ RegularHashAggregateExecTransformer (39) - +- ^ RegularHashAggregateExecTransformer (38) - +- ^ ProjectExecTransformer (37) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) - :- ^ InputIteratorTransformer (26) - : +- ShuffleQueryStage (24), Statistics(X) - : +- ColumnarExchange (23) - : +- VeloxResizeBatches (22) - : +- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16), Statistics(X) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ ScanTransformer parquet (10) - +- ^ InputIteratorTransformer (35) - +- ShuffleQueryStage (33), Statistics(X) - +- ColumnarExchange (32) - +- VeloxResizeBatches (31) - +- ^ ProjectExecTransformer (29) - +- ^ FilterExecTransformer (28) - +- ^ ScanTransformer parquet (27) + VeloxColumnarToRow (42) + +- TakeOrderedAndProjectExecTransformer (41) + +- ^ ProjectExecTransformer (39) + +- ^ RegularHashAggregateExecTransformer (38) + +- ^ ProjectExecTransformer (37) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) + :- ^ InputIteratorTransformer (26) + : +- ShuffleQueryStage (24), Statistics(X) + : +- ColumnarExchange (23) + : +- VeloxResizeBatches (22) + : +- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16), Statistics(X) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ ScanTransformer parquet (10) + +- ^ InputIteratorTransformer (35) + +- ShuffleQueryStage (33), Statistics(X) + +- ColumnarExchange (32) + +- VeloxResizeBatches (31) + +- ^ ProjectExecTransformer (29) + +- ^ FilterExecTransformer (28) + +- ^ ScanTransformer parquet (27) +- == Initial Plan == - TakeOrderedAndProject (66) - +- HashAggregate (65) - +- HashAggregate (64) - +- Project (63) - +- SortMergeJoin Inner (62) - :- Sort (56) - : +- Exchange (55) - : +- Project (54) - : +- SortMergeJoin Inner (53) - : :- Sort (48) - : : +- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Sort (52) - : +- Exchange (51) - : +- Filter (50) - : +- Scan parquet (49) - +- Sort (61) - +- Exchange (60) - +- Project (59) - +- Filter (58) - +- Scan parquet (57) + TakeOrderedAndProject (65) + +- HashAggregate (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (55) + : +- Exchange (54) + : +- Project (53) + : +- SortMergeJoin Inner (52) + : :- Sort (47) + : : +- Exchange (46) + : : +- Project (45) + : : +- Filter (44) + : : +- Scan parquet (43) + : +- Sort (51) + : +- Exchange (50) + : +- Filter (49) + : +- Scan parquet (48) + +- Sort (60) + +- Exchange (59) + +- Project (58) + +- Filter (57) + +- Scan parquet (56) (1) ScanTransformer parquet @@ -215,141 +214,134 @@ Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_exten (38) RegularHashAggregateExecTransformer Input [6]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] - -(39) RegularHashAggregateExecTransformer -Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(40) ProjectExecTransformer +(39) ProjectExecTransformer Output [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] Input [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(41) WholeStageCodegenTransformer (X) +(40) WholeStageCodegenTransformer (X) Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: false -(42) TakeOrderedAndProjectExecTransformer +(41) TakeOrderedAndProjectExecTransformer Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X], 0 -(43) VeloxColumnarToRow +(42) VeloxColumnarToRow Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(44) Scan parquet +(43) Scan parquet Output [2]: [c_custkey#X, c_mktsegment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_mktsegment), EqualTo(c_mktsegment,BUILDING), IsNotNull(c_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [2]: [c_custkey#X, c_mktsegment#X] Condition : ((isnotnull(c_mktsegment#X) AND (c_mktsegment#X = BUILDING)) AND isnotnull(c_custkey#X)) -(46) Project +(45) Project Output [1]: [c_custkey#X] Input [2]: [c_custkey#X, c_mktsegment#X] -(47) Exchange +(46) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(49) Scan parquet +(48) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(50) Filter +(49) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(51) Exchange +(50) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(51) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(53) SortMergeJoin +(52) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(54) Project +(53) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(55) Exchange +(54) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Sort +(55) Sort Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(57) Scan parquet +(56) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(58) Filter +(57) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(59) Project +(58) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(60) Exchange +(59) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(61) Sort +(60) Sort Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(62) SortMergeJoin +(61) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(63) Project +(62) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(64) HashAggregate +(63) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(65) HashAggregate +(64) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(66) TakeOrderedAndProject +(65) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(67) AdaptiveSparkPlan +(66) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt index 5278039ec385..e2d5e58eba47 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt @@ -428,55 +428,54 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (136) +AdaptiveSparkPlan (135) +- == Final Plan == - VeloxColumnarToRow (114) - +- ^ ProjectExecTransformer (112) - +- ^ RegularHashAggregateExecTransformer (111) - +- ^ RegularHashAggregateExecTransformer (110) - +- ^ ProjectExecTransformer (109) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) - :- ^ InputIteratorTransformer (103) - : +- ShuffleQueryStage (101), Statistics(X) - : +- ColumnarExchange (100) - : +- VeloxResizeBatches (99) - : +- ^ ProjectExecTransformer (97) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) - : :- ^ InputIteratorTransformer (91) - : : +- ShuffleQueryStage (89), Statistics(X) - : : +- ColumnarExchange (88) - : : +- VeloxResizeBatches (87) - : : +- ^ ProjectExecTransformer (85) - : : +- ^ FilterExecTransformer (84) - : : +- ^ ScanTransformer parquet (83) - : +- ^ InputIteratorTransformer (95) - : +- ShuffleQueryStage (93), Statistics(X) - : +- ReusedExchange (92) - +- ^ InputIteratorTransformer (107) - +- ShuffleQueryStage (105), Statistics(X) - +- ReusedExchange (104) + VeloxColumnarToRow (113) + +- ^ ProjectExecTransformer (111) + +- ^ RegularHashAggregateExecTransformer (110) + +- ^ ProjectExecTransformer (109) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) + :- ^ InputIteratorTransformer (103) + : +- ShuffleQueryStage (101), Statistics(X) + : +- ColumnarExchange (100) + : +- VeloxResizeBatches (99) + : +- ^ ProjectExecTransformer (97) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) + : :- ^ InputIteratorTransformer (91) + : : +- ShuffleQueryStage (89), Statistics(X) + : : +- ColumnarExchange (88) + : : +- VeloxResizeBatches (87) + : : +- ^ ProjectExecTransformer (85) + : : +- ^ FilterExecTransformer (84) + : : +- ^ ScanTransformer parquet (83) + : +- ^ InputIteratorTransformer (95) + : +- ShuffleQueryStage (93), Statistics(X) + : +- ReusedExchange (92) + +- ^ InputIteratorTransformer (107) + +- ShuffleQueryStage (105), Statistics(X) + +- ReusedExchange (104) +- == Initial Plan == - HashAggregate (135) - +- HashAggregate (134) - +- Project (133) - +- SortMergeJoin Inner (132) - :- Sort (126) - : +- Exchange (125) - : +- Project (124) - : +- SortMergeJoin Inner (123) - : :- Sort (118) - : : +- Exchange (117) - : : +- Filter (116) - : : +- Scan parquet (115) - : +- Sort (122) - : +- Exchange (121) - : +- Filter (120) - : +- Scan parquet (119) - +- Sort (131) - +- Exchange (130) - +- Project (129) - +- Filter (128) - +- Scan parquet (127) + HashAggregate (134) + +- HashAggregate (133) + +- Project (132) + +- SortMergeJoin Inner (131) + :- Sort (125) + : +- Exchange (124) + : +- Project (123) + : +- SortMergeJoin Inner (122) + : :- Sort (117) + : : +- Exchange (116) + : : +- Filter (115) + : : +- Scan parquet (114) + : +- Sort (121) + : +- Exchange (120) + : +- Filter (119) + : +- Scan parquet (118) + +- Sort (130) + +- Exchange (129) + +- Project (128) + +- Filter (127) + +- Scan parquet (126) (83) ScanTransformer parquet @@ -587,131 +586,124 @@ Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] (110) RegularHashAggregateExecTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] Keys: [] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(111) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] -Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] -(112) ProjectExecTransformer +(111) ProjectExecTransformer Output [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Input [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] -(113) WholeStageCodegenTransformer (X) +(112) WholeStageCodegenTransformer (X) Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: false -(114) VeloxColumnarToRow +(113) VeloxColumnarToRow Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(115) Scan parquet +(114) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(116) Filter +(115) Filter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Condition : isnotnull(ps_suppkey#X) -(117) Exchange +(116) Exchange Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Sort +(117) Sort Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 -(119) Scan parquet +(118) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(120) Filter +(119) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(121) Exchange +(120) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) Sort +(121) Sort Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 -(123) SortMergeJoin +(122) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(124) Project +(123) Project Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(125) Exchange +(124) Exchange Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) Sort +(125) Sort Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 -(127) Scan parquet +(126) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(128) Filter +(127) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(129) Project +(128) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(130) Exchange +(129) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(131) Sort +(130) Sort Input [1]: [n_nationkey#X] Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 -(132) SortMergeJoin +(131) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(133) Project +(132) Project Output [2]: [ps_availqty#X, ps_supplycost#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(134) HashAggregate +(133) HashAggregate Input [2]: [ps_availqty#X, ps_supplycost#X] Keys: [] Functions [1]: [partial_sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(135) HashAggregate +(134) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(136) AdaptiveSparkPlan +(135) AdaptiveSparkPlan Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt index ca06d654b432..59abfb682c42 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt @@ -1,55 +1,54 @@ == Physical Plan == -AdaptiveSparkPlan (58) +AdaptiveSparkPlan (57) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ SortExecTransformer (38) - +- ^ InputIteratorTransformer (37) - +- ShuffleQueryStage (35), Statistics(X) - +- ColumnarExchange (34) - +- VeloxResizeBatches (33) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28), Statistics(X) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ ProjectExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ RegularHashAggregateExecTransformer (20) - +- ^ ProjectExecTransformer (19) - +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) - :- ^ InputIteratorTransformer (8) - : +- ShuffleQueryStage (6), Statistics(X) - : +- ColumnarExchange (5) - : +- VeloxResizeBatches (4) - : +- ^ ProjectExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (17) - +- ShuffleQueryStage (15), Statistics(X) - +- ColumnarExchange (14) - +- VeloxResizeBatches (13) - +- ^ ProjectExecTransformer (11) - +- ^ FilterExecTransformer (10) - +- ^ ScanTransformer parquet (9) + VeloxColumnarToRow (39) + +- ^ SortExecTransformer (37) + +- ^ InputIteratorTransformer (36) + +- ShuffleQueryStage (34), Statistics(X) + +- ColumnarExchange (33) + +- VeloxResizeBatches (32) + +- ^ RegularHashAggregateExecTransformer (30) + +- ^ InputIteratorTransformer (29) + +- ShuffleQueryStage (27), Statistics(X) + +- ColumnarExchange (26) + +- VeloxResizeBatches (25) + +- ^ ProjectExecTransformer (23) + +- ^ FlushableHashAggregateExecTransformer (22) + +- ^ ProjectExecTransformer (21) + +- ^ RegularHashAggregateExecTransformer (20) + +- ^ ProjectExecTransformer (19) + +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) + :- ^ InputIteratorTransformer (8) + : +- ShuffleQueryStage (6), Statistics(X) + : +- ColumnarExchange (5) + : +- VeloxResizeBatches (4) + : +- ^ ProjectExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (17) + +- ShuffleQueryStage (15), Statistics(X) + +- ColumnarExchange (14) + +- VeloxResizeBatches (13) + +- ^ ProjectExecTransformer (11) + +- ^ FilterExecTransformer (10) + +- ^ ScanTransformer parquet (9) +- == Initial Plan == - Sort (57) - +- Exchange (56) - +- HashAggregate (55) - +- Exchange (54) - +- HashAggregate (53) - +- HashAggregate (52) - +- HashAggregate (51) - +- Project (50) - +- SortMergeJoin LeftOuter (49) - :- Sort (43) - : +- Exchange (42) - : +- Scan parquet (41) - +- Sort (48) - +- Exchange (47) - +- Project (46) - +- Filter (45) - +- Scan parquet (44) + Sort (56) + +- Exchange (55) + +- HashAggregate (54) + +- Exchange (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin LeftOuter (48) + :- Sort (42) + : +- Exchange (41) + : +- Scan parquet (40) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) ScanTransformer parquet @@ -134,181 +133,174 @@ Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] (20) RegularHashAggregateExecTransformer Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] -Functions [1]: [partial_count(o_orderkey#X)] -Aggregate Attributes [1]: [count#X] -Results [2]: [c_custkey#X, count#X] - -(21) RegularHashAggregateExecTransformer -Input [2]: [c_custkey#X, count#X] -Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [2]: [c_custkey#X, count(o_orderkey#X)#X] -(22) ProjectExecTransformer +(21) ProjectExecTransformer Output [1]: [count(o_orderkey#X)#X AS c_count#X] Input [2]: [c_custkey#X, count(o_orderkey#X)#X] -(23) FlushableHashAggregateExecTransformer +(22) FlushableHashAggregateExecTransformer Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(24) ProjectExecTransformer +(23) ProjectExecTransformer Output [3]: [hash(c_count#X, 42) AS hash_partition_key#X, c_count#X, count#X] Input [2]: [c_count#X, count#X] -(25) WholeStageCodegenTransformer (X) +(24) WholeStageCodegenTransformer (X) Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: false -(26) VeloxResizeBatches +(25) VeloxResizeBatches Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: X, X -(27) ColumnarExchange +(26) ColumnarExchange Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [c_count#X, count#X], [plan_id=X], [shuffle_writer_type=hash] -(28) ShuffleQueryStage +(27) ShuffleQueryStage Output [2]: [c_count#X, count#X] Arguments: X -(29) InputAdapter +(28) InputAdapter Input [2]: [c_count#X, count#X] -(30) InputIteratorTransformer +(29) InputIteratorTransformer Input [2]: [c_count#X, count#X] -(31) RegularHashAggregateExecTransformer +(30) RegularHashAggregateExecTransformer Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(32) WholeStageCodegenTransformer (X) +(31) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(33) VeloxResizeBatches +(32) VeloxResizeBatches Input [2]: [c_count#X, custdist#X] Arguments: X, X -(34) ColumnarExchange +(33) ColumnarExchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(35) ShuffleQueryStage +(34) ShuffleQueryStage Output [2]: [c_count#X, custdist#X] Arguments: X -(36) InputAdapter +(35) InputAdapter Input [2]: [c_count#X, custdist#X] -(37) InputIteratorTransformer +(36) InputIteratorTransformer Input [2]: [c_count#X, custdist#X] -(38) SortExecTransformer +(37) SortExecTransformer Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [2]: [c_count#X, custdist#X] -(41) Scan parquet +(40) Scan parquet Output [1]: [c_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(41) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Sort +(42) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(44) Scan parquet +(43) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(46) Project +(45) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(47) Exchange +(46) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [2]: [o_orderkey#X, o_custkey#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(49) SortMergeJoin +(48) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: LeftOuter Join condition: None -(50) Project +(49) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(51) HashAggregate +(50) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(52) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(53) HashAggregate +(52) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(54) Exchange +(53) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) HashAggregate +(54) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(56) Exchange +(55) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(57) Sort +(56) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(58) AdaptiveSparkPlan +(57) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt index c61d21b92ed1..531cc5aaab55 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt @@ -1,40 +1,39 @@ == Physical Plan == -AdaptiveSparkPlan (39) +AdaptiveSparkPlan (38) +- == Final Plan == - VeloxColumnarToRow (25) - +- ^ ProjectExecTransformer (23) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7), Statistics(X) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16), Statistics(X) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ ScanTransformer parquet (10) + VeloxColumnarToRow (24) + +- ^ ProjectExecTransformer (22) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7), Statistics(X) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16), Statistics(X) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ ScanTransformer parquet (10) +- == Initial Plan == - HashAggregate (38) - +- HashAggregate (37) - +- Project (36) - +- SortMergeJoin Inner (35) - :- Sort (30) - : +- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Sort (34) - +- Exchange (33) - +- Filter (32) - +- Scan parquet (31) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) ScanTransformer parquet @@ -124,94 +123,87 @@ Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] (21) RegularHashAggregateExecTransformer Input [5]: [l_extendedprice#X, l_discount#X, p_type#X, _pre_X#X, _pre_X#X] Keys: [] -Functions [2]: [partial_sum(_pre_X#X), partial_sum(_pre_X#X)] -Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Keys: [] -Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), sum((l_extendedprice#X * (1 - l_discount#X)))] +Functions [2]: [sum(_pre_X#X), sum(_pre_X#X)] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(23) ProjectExecTransformer +(22) ProjectExecTransformer Output [1]: [((100.00 * sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X) / sum((l_extendedprice#X * (1 - l_discount#X)))#X) AS promo_revenue#X] Input [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(24) WholeStageCodegenTransformer (X) +(23) WholeStageCodegenTransformer (X) Input [1]: [promo_revenue#X] Arguments: false -(25) VeloxColumnarToRow +(24) VeloxColumnarToRow Input [1]: [promo_revenue#X] -(26) Scan parquet +(25) Scan parquet Output [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-09-01), LessThan(l_shipdate,1995-10-01), IsNotNull(l_partkey)] ReadSchema: struct -(27) Filter +(26) Filter Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-09-01)) AND (l_shipdate#X < 1995-10-01)) AND isnotnull(l_partkey#X)) -(28) Project +(27) Project Output [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(29) Exchange +(28) Exchange Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Sort +(29) Sort Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(31) Scan parquet +(30) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(32) Filter +(31) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(33) Exchange +(32) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Sort +(33) Sort Input [2]: [p_partkey#X, p_type#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(35) SortMergeJoin +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(36) Project +(35) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(37) HashAggregate +(36) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(38) HashAggregate +(37) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [((100.00 * sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X) / sum((l_extendedprice#X * (1 - l_discount#X)))#X) AS promo_revenue#X] -(39) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt index 3585c0cc2f34..f370146f9206 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt @@ -259,31 +259,30 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (73) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRow (64) - +- ^ RegularHashAggregateExecTransformer (62) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ ProjectExecTransformer (60) - +- ^ RegularHashAggregateExecTransformer (59) - +- ^ InputIteratorTransformer (58) - +- ShuffleQueryStage (56), Statistics(X) - +- ColumnarExchange (55) - +- VeloxResizeBatches (54) - +- ^ ProjectExecTransformer (52) - +- ^ FlushableHashAggregateExecTransformer (51) - +- ^ ProjectExecTransformer (50) - +- ^ FilterExecTransformer (49) - +- ^ ScanTransformer parquet (48) + VeloxColumnarToRow (63) + +- ^ RegularHashAggregateExecTransformer (61) + +- ^ ProjectExecTransformer (60) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- VeloxResizeBatches (54) + +- ^ ProjectExecTransformer (52) + +- ^ FlushableHashAggregateExecTransformer (51) + +- ^ ProjectExecTransformer (50) + +- ^ FilterExecTransformer (49) + +- ^ ScanTransformer parquet (48) +- == Initial Plan == - HashAggregate (72) - +- HashAggregate (71) - +- HashAggregate (70) - +- Exchange (69) - +- HashAggregate (68) - +- Project (67) - +- Filter (66) - +- Scan parquet (65) + HashAggregate (71) + +- HashAggregate (70) + +- HashAggregate (69) + +- Exchange (68) + +- HashAggregate (67) + +- Project (66) + +- Filter (65) + +- Scan parquet (64) (48) ScanTransformer parquet @@ -348,71 +347,64 @@ Input [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] (61) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] -Functions [1]: [partial_max(total_revenue#X)] -Aggregate Attributes [1]: [max#X] -Results [1]: [max#X] - -(62) RegularHashAggregateExecTransformer -Input [1]: [max#X] -Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(63) WholeStageCodegenTransformer (X) +(62) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(64) VeloxColumnarToRow +(63) VeloxColumnarToRow Input [1]: [max(total_revenue)#X] -(65) Scan parquet +(64) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(66) Filter +(65) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(67) Project +(66) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(68) HashAggregate +(67) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(69) Exchange +(68) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) HashAggregate +(69) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] -(71) HashAggregate +(70) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(72) HashAggregate +(71) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(73) AdaptiveSparkPlan +(72) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt index c6d0e0103363..c1b43b7fc421 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt @@ -1,62 +1,61 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (62) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ ProjectExecTransformer (38) - +- ^ RegularHashAggregateExecTransformer (37) - +- ^ RegularHashAggregateExecTransformer (36) - +- ^ ProjectExecTransformer (35) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) - :- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16), Statistics(X) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ ScanTransformer parquet (10) - +- ^ FilterExecTransformer (33) - +- ^ ProjectExecTransformer (32) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28), Statistics(X) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ FilterExecTransformer (22) - +- ^ ScanTransformer parquet (21) + VeloxColumnarToRow (39) + +- ^ ProjectExecTransformer (37) + +- ^ RegularHashAggregateExecTransformer (36) + +- ^ ProjectExecTransformer (35) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) + :- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16), Statistics(X) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ ScanTransformer parquet (10) + +- ^ FilterExecTransformer (33) + +- ^ ProjectExecTransformer (32) + +- ^ RegularHashAggregateExecTransformer (31) + +- ^ InputIteratorTransformer (30) + +- ShuffleQueryStage (28), Statistics(X) + +- ColumnarExchange (27) + +- VeloxResizeBatches (26) + +- ^ ProjectExecTransformer (24) + +- ^ FlushableHashAggregateExecTransformer (23) + +- ^ FilterExecTransformer (22) + +- ^ ScanTransformer parquet (21) +- == Initial Plan == - HashAggregate (62) - +- HashAggregate (61) - +- Project (60) - +- SortMergeJoin Inner (59) - :- Project (51) - : +- SortMergeJoin Inner (50) - : :- Sort (44) - : : +- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Sort (49) - : +- Exchange (48) - : +- Project (47) - : +- Filter (46) - : +- Scan parquet (45) - +- Sort (58) - +- Filter (57) - +- HashAggregate (56) - +- Exchange (55) - +- HashAggregate (54) - +- Filter (53) - +- Scan parquet (52) + HashAggregate (61) + +- HashAggregate (60) + +- Project (59) + +- SortMergeJoin Inner (58) + :- Project (50) + : +- SortMergeJoin Inner (49) + : :- Sort (43) + : : +- Exchange (42) + : : +- Filter (41) + : : +- Scan parquet (40) + : +- Sort (48) + : +- Exchange (47) + : +- Project (46) + : +- Filter (45) + : +- Scan parquet (44) + +- Sort (57) + +- Filter (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- Filter (52) + +- Scan parquet (51) (1) ScanTransformer parquet @@ -215,141 +214,134 @@ Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity) (36) RegularHashAggregateExecTransformer Input [1]: [l_extendedprice#X] Keys: [] -Functions [1]: [partial_sum(l_extendedprice#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(37) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [sum(l_extendedprice#X)#X] -(38) ProjectExecTransformer +(37) ProjectExecTransformer Output [1]: [(sum(l_extendedprice#X)#X / 7.0) AS avg_yearly#X] Input [1]: [sum(l_extendedprice#X)#X] -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [1]: [avg_yearly#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [1]: [avg_yearly#X] -(41) Scan parquet +(40) Scan parquet Output [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_quantity)] ReadSchema: struct -(42) Filter +(41) Filter Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) -(43) Exchange +(42) Exchange Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(43) Sort Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(45) Scan parquet +(44) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(46) Filter +(45) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(47) Project +(46) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(48) Exchange +(47) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(48) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(50) SortMergeJoin +(49) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(51) Project +(50) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(52) Scan parquet +(51) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(53) Filter +(52) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(54) HashAggregate +(53) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(55) Exchange +(54) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) HashAggregate +(55) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [(0.2 * avg(l_quantity#X)#X) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(57) Filter +(56) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(58) Sort +(57) Sort Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(59) SortMergeJoin +(58) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(60) Project +(59) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(61) HashAggregate +(60) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(62) HashAggregate +(61) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [(sum(l_extendedprice#X)#X / 7.0) AS avg_yearly#X] -(63) AdaptiveSparkPlan +(62) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt index 50a076b1ef8f..c1a6b181bb34 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt @@ -1,102 +1,101 @@ == Physical Plan == -AdaptiveSparkPlan (110) +AdaptiveSparkPlan (109) +- == Final Plan == - VeloxColumnarToRow (70) - +- TakeOrderedAndProjectExecTransformer (69) - +- ^ RegularHashAggregateExecTransformer (67) - +- ^ RegularHashAggregateExecTransformer (66) - +- ^ ProjectExecTransformer (65) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) - :- ^ InputIteratorTransformer (46) - : +- ShuffleQueryStage (44), Statistics(X) - : +- ColumnarExchange (43) - : +- VeloxResizeBatches (42) - : +- ^ ProjectExecTransformer (40) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (38) - : +- ShuffleQueryStage (36), Statistics(X) - : +- ColumnarExchange (35) - : +- VeloxResizeBatches (34) - : +- ^ ProjectExecTransformer (32) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) - : :- ^ InputIteratorTransformer (18) - : : +- ShuffleQueryStage (16), Statistics(X) - : : +- ColumnarExchange (15) - : : +- VeloxResizeBatches (14) - : : +- ^ ProjectExecTransformer (12) - : : +- ^ FilterExecTransformer (11) - : : +- ^ ScanTransformer parquet (10) - : +- ^ ProjectExecTransformer (30) - : +- ^ FilterExecTransformer (29) - : +- ^ RegularHashAggregateExecTransformer (28) - : +- ^ InputIteratorTransformer (27) - : +- ShuffleQueryStage (25), Statistics(X) - : +- ColumnarExchange (24) - : +- VeloxResizeBatches (23) - : +- ^ ProjectExecTransformer (21) - : +- ^ FlushableHashAggregateExecTransformer (20) - : +- ^ ScanTransformer parquet (19) - +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) - :- ^ InputIteratorTransformer (55) - : +- ShuffleQueryStage (53), Statistics(X) - : +- ColumnarExchange (52) - : +- VeloxResizeBatches (51) - : +- ^ ProjectExecTransformer (49) - : +- ^ FilterExecTransformer (48) - : +- ^ ScanTransformer parquet (47) - +- ^ ProjectExecTransformer (62) - +- ^ FilterExecTransformer (61) - +- ^ RegularHashAggregateExecTransformer (60) - +- ^ InputIteratorTransformer (59) - +- ShuffleQueryStage (57), Statistics(X) - +- ReusedExchange (56) + VeloxColumnarToRow (69) + +- TakeOrderedAndProjectExecTransformer (68) + +- ^ RegularHashAggregateExecTransformer (66) + +- ^ ProjectExecTransformer (65) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) + :- ^ InputIteratorTransformer (46) + : +- ShuffleQueryStage (44), Statistics(X) + : +- ColumnarExchange (43) + : +- VeloxResizeBatches (42) + : +- ^ ProjectExecTransformer (40) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (38) + : +- ShuffleQueryStage (36), Statistics(X) + : +- ColumnarExchange (35) + : +- VeloxResizeBatches (34) + : +- ^ ProjectExecTransformer (32) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) + : :- ^ InputIteratorTransformer (18) + : : +- ShuffleQueryStage (16), Statistics(X) + : : +- ColumnarExchange (15) + : : +- VeloxResizeBatches (14) + : : +- ^ ProjectExecTransformer (12) + : : +- ^ FilterExecTransformer (11) + : : +- ^ ScanTransformer parquet (10) + : +- ^ ProjectExecTransformer (30) + : +- ^ FilterExecTransformer (29) + : +- ^ RegularHashAggregateExecTransformer (28) + : +- ^ InputIteratorTransformer (27) + : +- ShuffleQueryStage (25), Statistics(X) + : +- ColumnarExchange (24) + : +- VeloxResizeBatches (23) + : +- ^ ProjectExecTransformer (21) + : +- ^ FlushableHashAggregateExecTransformer (20) + : +- ^ ScanTransformer parquet (19) + +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) + :- ^ InputIteratorTransformer (55) + : +- ShuffleQueryStage (53), Statistics(X) + : +- ColumnarExchange (52) + : +- VeloxResizeBatches (51) + : +- ^ ProjectExecTransformer (49) + : +- ^ FilterExecTransformer (48) + : +- ^ ScanTransformer parquet (47) + +- ^ ProjectExecTransformer (62) + +- ^ FilterExecTransformer (61) + +- ^ RegularHashAggregateExecTransformer (60) + +- ^ InputIteratorTransformer (59) + +- ShuffleQueryStage (57), Statistics(X) + +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (109) - +- HashAggregate (108) - +- HashAggregate (107) - +- Project (106) - +- SortMergeJoin Inner (105) - :- Sort (92) - : +- Exchange (91) - : +- Project (90) - : +- SortMergeJoin Inner (89) - : :- Sort (74) - : : +- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Sort (88) - : +- Exchange (87) - : +- SortMergeJoin LeftSemi (86) - : :- Sort (78) - : : +- Exchange (77) - : : +- Filter (76) - : : +- Scan parquet (75) - : +- Sort (85) - : +- Project (84) - : +- Filter (83) - : +- HashAggregate (82) - : +- Exchange (81) - : +- HashAggregate (80) - : +- Scan parquet (79) - +- SortMergeJoin LeftSemi (104) - :- Sort (96) - : +- Exchange (95) - : +- Filter (94) - : +- Scan parquet (93) - +- Sort (103) - +- Project (102) - +- Filter (101) - +- HashAggregate (100) - +- Exchange (99) - +- HashAggregate (98) - +- Scan parquet (97) + TakeOrderedAndProject (108) + +- HashAggregate (107) + +- HashAggregate (106) + +- Project (105) + +- SortMergeJoin Inner (104) + :- Sort (91) + : +- Exchange (90) + : +- Project (89) + : +- SortMergeJoin Inner (88) + : :- Sort (73) + : : +- Exchange (72) + : : +- Filter (71) + : : +- Scan parquet (70) + : +- Sort (87) + : +- Exchange (86) + : +- SortMergeJoin LeftSemi (85) + : :- Sort (77) + : : +- Exchange (76) + : : +- Filter (75) + : : +- Scan parquet (74) + : +- Sort (84) + : +- Project (83) + : +- Filter (82) + : +- HashAggregate (81) + : +- Exchange (80) + : +- HashAggregate (79) + : +- Scan parquet (78) + +- SortMergeJoin LeftSemi (103) + :- Sort (95) + : +- Exchange (94) + : +- Filter (93) + : +- Scan parquet (92) + +- Sort (102) + +- Project (101) + +- Filter (100) + +- HashAggregate (99) + +- Exchange (98) + +- HashAggregate (97) + +- Scan parquet (96) (1) ScanTransformer parquet @@ -375,223 +374,216 @@ Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, (66) RegularHashAggregateExecTransformer Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] - -(67) RegularHashAggregateExecTransformer -Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(68) WholeStageCodegenTransformer (X) +(67) WholeStageCodegenTransformer (X) Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: false -(69) TakeOrderedAndProjectExecTransformer +(68) TakeOrderedAndProjectExecTransformer Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X], 0 -(70) VeloxColumnarToRow +(69) VeloxColumnarToRow Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(71) Scan parquet +(70) Scan parquet Output [2]: [c_custkey#X, c_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey)] ReadSchema: struct -(72) Filter +(71) Filter Input [2]: [c_custkey#X, c_name#X] Condition : isnotnull(c_custkey#X) -(73) Exchange +(72) Exchange Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Sort +(73) Sort Input [2]: [c_custkey#X, c_name#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(75) Scan parquet +(74) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(76) Filter +(75) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(77) Exchange +(76) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Sort +(77) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(79) Scan parquet +(78) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(80) HashAggregate +(79) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(81) Exchange +(80) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) HashAggregate +(81) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(83) Filter +(82) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(84) Project +(83) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(85) Sort +(84) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(86) SortMergeJoin +(85) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(87) Exchange +(86) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Sort +(87) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(89) SortMergeJoin +(88) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(90) Project +(89) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(91) Exchange +(90) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) Sort +(91) Sort Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(93) Scan parquet +(92) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(94) Filter +(93) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(95) Exchange +(94) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(96) Sort +(95) Sort Input [2]: [l_orderkey#X, l_quantity#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(97) Scan parquet +(96) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(98) HashAggregate +(97) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(99) Exchange +(98) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) HashAggregate +(99) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(101) Filter +(100) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(102) Project +(101) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(103) Sort +(102) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(104) SortMergeJoin +(103) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(105) SortMergeJoin +(104) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(106) Project +(105) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(107) HashAggregate +(106) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(108) HashAggregate +(107) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(109) TakeOrderedAndProject +(108) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(110) AdaptiveSparkPlan +(109) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt index fb7e231fa5b3..4db9ca0c8393 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt @@ -1,39 +1,38 @@ == Physical Plan == -AdaptiveSparkPlan (38) +AdaptiveSparkPlan (37) +- == Final Plan == - VeloxColumnarToRow (24) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7), Statistics(X) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16), Statistics(X) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ ScanTransformer parquet (10) + VeloxColumnarToRow (23) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7), Statistics(X) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16), Statistics(X) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ ScanTransformer parquet (10) +- == Initial Plan == - HashAggregate (37) - +- HashAggregate (36) - +- Project (35) - +- SortMergeJoin Inner (34) - :- Sort (29) - : +- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Sort (33) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (36) + +- HashAggregate (35) + +- Project (34) + +- SortMergeJoin Inner (33) + :- Sort (28) + : +- Exchange (27) + : +- Project (26) + : +- Filter (25) + : +- Scan parquet (24) + +- Sort (32) + +- Exchange (31) + +- Filter (30) + +- Scan parquet (29) (1) ScanTransformer parquet @@ -123,90 +122,83 @@ Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partke (21) RegularHashAggregateExecTransformer Input [3]: [l_extendedprice#X, l_discount#X, _pre_X#X] Keys: [] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] -Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] -(23) WholeStageCodegenTransformer (X) +(22) WholeStageCodegenTransformer (X) Input [1]: [revenue#X] Arguments: false -(24) VeloxColumnarToRow +(23) VeloxColumnarToRow Input [1]: [revenue#X] -(25) Scan parquet +(24) Scan parquet Output [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipinstruct), In(l_shipmode, [AIR,AIR REG]), EqualTo(l_shipinstruct,DELIVER IN PERSON), IsNotNull(l_partkey), Or(Or(And(GreaterThanOrEqual(l_quantity,1.00),LessThanOrEqual(l_quantity,11.00)),And(GreaterThanOrEqual(l_quantity,10.00),LessThanOrEqual(l_quantity,20.00))),And(GreaterThanOrEqual(l_quantity,20.00),LessThanOrEqual(l_quantity,30.00)))] ReadSchema: struct -(26) Filter +(25) Filter Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Condition : ((((isnotnull(l_shipinstruct#X) AND l_shipmode#X IN (AIR,AIR REG)) AND (l_shipinstruct#X = DELIVER IN PERSON)) AND isnotnull(l_partkey#X)) AND ((((l_quantity#X >= 1.00) AND (l_quantity#X <= 11.00)) OR ((l_quantity#X >= 10.00) AND (l_quantity#X <= 20.00))) OR ((l_quantity#X >= 20.00) AND (l_quantity#X <= 30.00)))) -(27) Project +(26) Project Output [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] -(28) Exchange +(27) Exchange Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Sort +(28) Sort Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(30) Scan parquet +(29) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(31) Filter +(30) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(32) Exchange +(31) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) Sort +(32) Sort Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(34) SortMergeJoin +(33) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(35) Project +(34) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(36) HashAggregate +(35) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(37) HashAggregate +(36) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] -(38) AdaptiveSparkPlan +(37) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt index 4e427961a4f6..4d8eee1f1fb7 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt @@ -1,19 +1,19 @@ == Physical Plan == -AdaptiveSparkPlan (143) +AdaptiveSparkPlan (142) +- == Final Plan == - VeloxColumnarToRow (93) - +- AQEShuffleRead (92) - +- ShuffleQueryStage (91), Statistics(X) - +- ColumnarExchange (90) - +- VeloxResizeBatches (89) - +- ^ ProjectExecTransformer (87) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (86) - :- ^ InputIteratorTransformer (76) - : +- ShuffleQueryStage (74), Statistics(X) - : +- ColumnarExchange (73) - : +- VeloxResizeBatches (72) - : +- ^ ProjectExecTransformer (70) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (69) + VeloxColumnarToRow (92) + +- AQEShuffleRead (91) + +- ShuffleQueryStage (90), Statistics(X) + +- ColumnarExchange (89) + +- VeloxResizeBatches (88) + +- ^ ProjectExecTransformer (86) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (85) + :- ^ InputIteratorTransformer (75) + : +- ShuffleQueryStage (73), Statistics(X) + : +- ColumnarExchange (72) + : +- VeloxResizeBatches (71) + : +- ^ ProjectExecTransformer (69) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (68) : :- ^ InputIteratorTransformer (9) : : +- ShuffleQueryStage (7), Statistics(X) : : +- ColumnarExchange (6) @@ -21,12 +21,12 @@ AdaptiveSparkPlan (143) : : +- ^ ProjectExecTransformer (3) : : +- ^ FilterExecTransformer (2) : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (68) - : +- ShuffleQueryStage (66), Statistics(X) - : +- ColumnarExchange (65) - : +- VeloxResizeBatches (64) - : +- ^ ProjectExecTransformer (62) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (61) + : +- ^ InputIteratorTransformer (67) + : +- ShuffleQueryStage (65), Statistics(X) + : +- ColumnarExchange (64) + : +- VeloxResizeBatches (63) + : +- ^ ProjectExecTransformer (61) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (60) : :- ^ InputIteratorTransformer (35) : : +- ShuffleQueryStage (33), Statistics(X) : : +- ColumnarExchange (32) @@ -47,83 +47,82 @@ AdaptiveSparkPlan (143) : : +- ^ ProjectExecTransformer (21) : : +- ^ FilterExecTransformer (20) : : +- ^ ScanTransformer parquet (19) - : +- ^ InputIteratorTransformer (60) - : +- ShuffleQueryStage (58), Statistics(X) - : +- ColumnarExchange (57) - : +- VeloxResizeBatches (56) - : +- ^ ProjectExecTransformer (54) - : +- ^ FilterExecTransformer (53) - : +- ^ ProjectExecTransformer (52) - : +- ^ RegularHashAggregateExecTransformer (51) - : +- ^ RegularHashAggregateExecTransformer (50) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) - : :- ^ InputIteratorTransformer (44) - : : +- ShuffleQueryStage (42), Statistics(X) - : : +- ColumnarExchange (41) - : : +- VeloxResizeBatches (40) - : : +- ^ ProjectExecTransformer (38) - : : +- ^ FilterExecTransformer (37) - : : +- ^ ScanTransformer parquet (36) - : +- ^ InputIteratorTransformer (48) - : +- ShuffleQueryStage (46), Statistics(X) - : +- ReusedExchange (45) - +- ^ InputIteratorTransformer (85) - +- ShuffleQueryStage (83), Statistics(X) - +- ColumnarExchange (82) - +- VeloxResizeBatches (81) - +- ^ ProjectExecTransformer (79) - +- ^ FilterExecTransformer (78) - +- ^ ScanTransformer parquet (77) + : +- ^ InputIteratorTransformer (59) + : +- ShuffleQueryStage (57), Statistics(X) + : +- ColumnarExchange (56) + : +- VeloxResizeBatches (55) + : +- ^ ProjectExecTransformer (53) + : +- ^ FilterExecTransformer (52) + : +- ^ ProjectExecTransformer (51) + : +- ^ RegularHashAggregateExecTransformer (50) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) + : :- ^ InputIteratorTransformer (44) + : : +- ShuffleQueryStage (42), Statistics(X) + : : +- ColumnarExchange (41) + : : +- VeloxResizeBatches (40) + : : +- ^ ProjectExecTransformer (38) + : : +- ^ FilterExecTransformer (37) + : : +- ^ ScanTransformer parquet (36) + : +- ^ InputIteratorTransformer (48) + : +- ShuffleQueryStage (46), Statistics(X) + : +- ReusedExchange (45) + +- ^ InputIteratorTransformer (84) + +- ShuffleQueryStage (82), Statistics(X) + +- ColumnarExchange (81) + +- VeloxResizeBatches (80) + +- ^ ProjectExecTransformer (78) + +- ^ FilterExecTransformer (77) + +- ^ ScanTransformer parquet (76) +- == Initial Plan == - Sort (142) - +- Exchange (141) - +- Project (140) - +- SortMergeJoin Inner (139) - :- Sort (133) - : +- Exchange (132) - : +- Project (131) - : +- SortMergeJoin LeftSemi (130) - : :- Sort (97) - : : +- Exchange (96) - : : +- Filter (95) - : : +- Scan parquet (94) - : +- Sort (129) - : +- Exchange (128) - : +- Project (127) - : +- SortMergeJoin Inner (126) - : :- Sort (109) - : : +- Exchange (108) - : : +- SortMergeJoin LeftSemi (107) - : : :- Sort (101) - : : : +- Exchange (100) - : : : +- Filter (99) - : : : +- Scan parquet (98) - : : +- Sort (106) - : : +- Exchange (105) - : : +- Project (104) - : : +- Filter (103) - : : +- Scan parquet (102) - : +- Sort (125) - : +- Exchange (124) - : +- Filter (123) - : +- HashAggregate (122) - : +- HashAggregate (121) - : +- SortMergeJoin LeftSemi (120) - : :- Sort (114) - : : +- Exchange (113) - : : +- Project (112) - : : +- Filter (111) - : : +- Scan parquet (110) - : +- Sort (119) - : +- Exchange (118) - : +- Project (117) - : +- Filter (116) - : +- Scan parquet (115) - +- Sort (138) - +- Exchange (137) - +- Project (136) - +- Filter (135) - +- Scan parquet (134) + Sort (141) + +- Exchange (140) + +- Project (139) + +- SortMergeJoin Inner (138) + :- Sort (132) + : +- Exchange (131) + : +- Project (130) + : +- SortMergeJoin LeftSemi (129) + : :- Sort (96) + : : +- Exchange (95) + : : +- Filter (94) + : : +- Scan parquet (93) + : +- Sort (128) + : +- Exchange (127) + : +- Project (126) + : +- SortMergeJoin Inner (125) + : :- Sort (108) + : : +- Exchange (107) + : : +- SortMergeJoin LeftSemi (106) + : : :- Sort (100) + : : : +- Exchange (99) + : : : +- Filter (98) + : : : +- Scan parquet (97) + : : +- Sort (105) + : : +- Exchange (104) + : : +- Project (103) + : : +- Filter (102) + : : +- Scan parquet (101) + : +- Sort (124) + : +- Exchange (123) + : +- Filter (122) + : +- HashAggregate (121) + : +- HashAggregate (120) + : +- SortMergeJoin LeftSemi (119) + : :- Sort (113) + : : +- Exchange (112) + : : +- Project (111) + : : +- Filter (110) + : : +- Scan parquet (109) + : +- Sort (118) + : +- Exchange (117) + : +- Project (116) + : +- Filter (115) + : +- Scan parquet (114) + +- Sort (137) + +- Exchange (136) + +- Project (135) + +- Filter (134) + +- Scan parquet (133) (1) ScanTransformer parquet @@ -328,415 +327,408 @@ Join condition: None (50) RegularHashAggregateExecTransformer Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] - -(51) RegularHashAggregateExecTransformer -Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(52) ProjectExecTransformer +(51) ProjectExecTransformer Output [3]: [(0.5 * sum(l_quantity#X)#X) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(53) FilterExecTransformer +(52) FilterExecTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: isnotnull((0.5 * sum(l_quantity))#X) -(54) ProjectExecTransformer +(53) ProjectExecTransformer Output [4]: [hash(l_partkey#X, l_suppkey#X, 42) AS hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(55) WholeStageCodegenTransformer (X) +(54) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: false -(56) VeloxResizeBatches +(55) VeloxResizeBatches Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X, X -(57) ColumnarExchange +(56) ColumnarExchange Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(58) ShuffleQueryStage +(57) ShuffleQueryStage Output [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X -(59) InputAdapter +(58) InputAdapter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(60) InputIteratorTransformer +(59) InputIteratorTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(61) ShuffledHashJoinExecTransformer +(60) ShuffledHashJoinExecTransformer Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join type: Inner Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(62) ProjectExecTransformer +(61) ProjectExecTransformer Output [2]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(63) WholeStageCodegenTransformer (X) +(62) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: false -(64) VeloxResizeBatches +(63) VeloxResizeBatches Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: X, X -(65) ColumnarExchange +(64) ColumnarExchange Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(66) ShuffleQueryStage +(65) ShuffleQueryStage Output [1]: [ps_suppkey#X] Arguments: X -(67) InputAdapter +(66) InputAdapter Input [1]: [ps_suppkey#X] -(68) InputIteratorTransformer +(67) InputIteratorTransformer Input [1]: [ps_suppkey#X] -(69) ShuffledHashJoinExecTransformer +(68) ShuffledHashJoinExecTransformer Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join type: LeftSemi Join condition: None -(70) ProjectExecTransformer +(69) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(71) WholeStageCodegenTransformer (X) +(70) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: false -(72) VeloxResizeBatches +(71) VeloxResizeBatches Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: X, X -(73) ColumnarExchange +(72) ColumnarExchange Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [s_name#X, s_address#X, s_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(74) ShuffleQueryStage +(73) ShuffleQueryStage Output [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: X -(75) InputAdapter +(74) InputAdapter Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(76) InputIteratorTransformer +(75) InputIteratorTransformer Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(77) ScanTransformer parquet +(76) ScanTransformer parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(78) FilterExecTransformer +(77) FilterExecTransformer Input [2]: [n_nationkey#X, n_name#X] Arguments: ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(79) ProjectExecTransformer +(78) ProjectExecTransformer Output [2]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(80) WholeStageCodegenTransformer (X) +(79) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: false -(81) VeloxResizeBatches +(80) VeloxResizeBatches Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: X, X -(82) ColumnarExchange +(81) ColumnarExchange Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(83) ShuffleQueryStage +(82) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(84) InputAdapter +(83) InputAdapter Input [1]: [n_nationkey#X] -(85) InputIteratorTransformer +(84) InputIteratorTransformer Input [1]: [n_nationkey#X] -(86) ShuffledHashJoinExecTransformer +(85) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(87) ProjectExecTransformer +(86) ProjectExecTransformer Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(88) WholeStageCodegenTransformer (X) +(87) WholeStageCodegenTransformer (X) Input [2]: [s_name#X, s_address#X] Arguments: false -(89) VeloxResizeBatches +(88) VeloxResizeBatches Input [2]: [s_name#X, s_address#X] Arguments: X, X -(90) ColumnarExchange +(89) ColumnarExchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(91) ShuffleQueryStage +(90) ShuffleQueryStage Output [2]: [s_name#X, s_address#X] Arguments: X -(92) AQEShuffleRead +(91) AQEShuffleRead Input [2]: [s_name#X, s_address#X] Arguments: local -(93) VeloxColumnarToRow +(92) VeloxColumnarToRow Input [2]: [s_name#X, s_address#X] -(94) Scan parquet +(93) Scan parquet Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_nationkey)] ReadSchema: struct -(95) Filter +(94) Filter Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Condition : isnotnull(s_nationkey#X) -(96) Exchange +(95) Exchange Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) Sort +(96) Sort Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 -(98) Scan parquet +(97) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(99) Filter +(98) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(100) Exchange +(99) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(101) Sort +(100) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 -(102) Scan parquet +(101) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(103) Filter +(102) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(104) Project +(103) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(105) Exchange +(104) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Sort +(105) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(107) SortMergeJoin +(106) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join type: LeftSemi Join condition: None -(108) Exchange +(107) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Sort +(108) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 -(110) Scan parquet +(109) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(111) Filter +(110) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(112) Project +(111) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(113) Exchange +(112) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) Sort +(113) Sort Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(115) Scan parquet +(114) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(116) Filter +(115) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(117) Project +(116) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(118) Exchange +(117) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) Sort +(118) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(120) SortMergeJoin +(119) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: LeftSemi Join condition: None -(121) HashAggregate +(120) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(122) HashAggregate +(121) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [(0.5 * sum(l_quantity#X)#X) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(123) Filter +(122) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(124) Exchange +(123) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) Sort +(124) Sort Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 -(126) SortMergeJoin +(125) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join type: Inner Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(127) Project +(126) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(128) Exchange +(127) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Sort +(128) Sort Input [1]: [ps_suppkey#X] Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 -(130) SortMergeJoin +(129) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join type: LeftSemi Join condition: None -(131) Project +(130) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(132) Exchange +(131) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(133) Sort +(132) Sort Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 -(134) Scan parquet +(133) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(135) Filter +(134) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(136) Project +(135) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(137) Exchange +(136) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(137) Sort Input [1]: [n_nationkey#X] Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 -(139) SortMergeJoin +(138) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(140) Project +(139) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(141) Exchange +(140) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) Sort +(141) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(143) AdaptiveSparkPlan +(142) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt index 4aa745832681..1f86369a62af 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt @@ -1,64 +1,63 @@ == Physical Plan == -AdaptiveSparkPlan (67) +AdaptiveSparkPlan (66) +- == Final Plan == - VeloxColumnarToRow (43) - +- TakeOrderedAndProjectExecTransformer (42) - +- ^ ProjectExecTransformer (40) - +- ^ RegularHashAggregateExecTransformer (39) - +- ^ RegularHashAggregateExecTransformer (38) - +- ^ ProjectExecTransformer (37) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) - :- ^ InputIteratorTransformer (26) - : +- ShuffleQueryStage (24), Statistics(X) - : +- ColumnarExchange (23) - : +- VeloxResizeBatches (22) - : +- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16), Statistics(X) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ ScanTransformer parquet (10) - +- ^ InputIteratorTransformer (35) - +- ShuffleQueryStage (33), Statistics(X) - +- ColumnarExchange (32) - +- VeloxResizeBatches (31) - +- ^ ProjectExecTransformer (29) - +- ^ FilterExecTransformer (28) - +- ^ ScanTransformer parquet (27) + VeloxColumnarToRow (42) + +- TakeOrderedAndProjectExecTransformer (41) + +- ^ ProjectExecTransformer (39) + +- ^ RegularHashAggregateExecTransformer (38) + +- ^ ProjectExecTransformer (37) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) + :- ^ InputIteratorTransformer (26) + : +- ShuffleQueryStage (24), Statistics(X) + : +- ColumnarExchange (23) + : +- VeloxResizeBatches (22) + : +- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16), Statistics(X) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ ScanTransformer parquet (10) + +- ^ InputIteratorTransformer (35) + +- ShuffleQueryStage (33), Statistics(X) + +- ColumnarExchange (32) + +- VeloxResizeBatches (31) + +- ^ ProjectExecTransformer (29) + +- ^ FilterExecTransformer (28) + +- ^ ScanTransformer parquet (27) +- == Initial Plan == - TakeOrderedAndProject (66) - +- HashAggregate (65) - +- HashAggregate (64) - +- Project (63) - +- SortMergeJoin Inner (62) - :- Sort (56) - : +- Exchange (55) - : +- Project (54) - : +- SortMergeJoin Inner (53) - : :- Sort (48) - : : +- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Sort (52) - : +- Exchange (51) - : +- Filter (50) - : +- Scan parquet (49) - +- Sort (61) - +- Exchange (60) - +- Project (59) - +- Filter (58) - +- Scan parquet (57) + TakeOrderedAndProject (65) + +- HashAggregate (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (55) + : +- Exchange (54) + : +- Project (53) + : +- SortMergeJoin Inner (52) + : :- Sort (47) + : : +- Exchange (46) + : : +- Project (45) + : : +- Filter (44) + : : +- Scan parquet (43) + : +- Sort (51) + : +- Exchange (50) + : +- Filter (49) + : +- Scan parquet (48) + +- Sort (60) + +- Exchange (59) + +- Project (58) + +- Filter (57) + +- Scan parquet (56) (1) ScanTransformer parquet @@ -217,143 +216,136 @@ Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_exten (38) RegularHashAggregateExecTransformer Input [6]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] - -(39) RegularHashAggregateExecTransformer -Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(40) ProjectExecTransformer +(39) ProjectExecTransformer Output [4]: [l_orderkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] Input [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(41) WholeStageCodegenTransformer (X) +(40) WholeStageCodegenTransformer (X) Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: false -(42) TakeOrderedAndProjectExecTransformer +(41) TakeOrderedAndProjectExecTransformer Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X], 0 -(43) VeloxColumnarToRow +(42) VeloxColumnarToRow Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(44) Scan parquet +(43) Scan parquet Output [2]: [c_custkey#X, c_mktsegment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_mktsegment), EqualTo(c_mktsegment,BUILDING), IsNotNull(c_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [2]: [c_custkey#X, c_mktsegment#X] Condition : ((isnotnull(c_mktsegment#X) AND (c_mktsegment#X = BUILDING)) AND isnotnull(c_custkey#X)) -(46) Project +(45) Project Output [1]: [c_custkey#X] Input [2]: [c_custkey#X, c_mktsegment#X] -(47) Exchange +(46) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(49) Scan parquet +(48) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(50) Filter +(49) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(51) Exchange +(50) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(51) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(53) SortMergeJoin +(52) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(54) Project +(53) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(55) Exchange +(54) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Sort +(55) Sort Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(57) Scan parquet +(56) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(58) Filter +(57) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(59) Project +(58) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(60) Exchange +(59) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(61) Sort +(60) Sort Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(62) SortMergeJoin +(61) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(63) Project +(62) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(64) HashAggregate +(63) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(65) HashAggregate +(64) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [4]: [l_orderkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(66) TakeOrderedAndProject +(65) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(67) AdaptiveSparkPlan +(66) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala index 84d1fe8db919..cf0bf272bc5b 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala @@ -49,6 +49,7 @@ abstract class VeloxAggregateFunctionsSuite extends VeloxWholeStageTransformerSu .set("spark.unsafe.exceptionOnMemoryLeak", "true") .set("spark.sql.autoBroadcastJoinThreshold", "-1") .set("spark.sql.sources.useV1SourceList", "avro") + .set("spark.gluten.sql.mergeTwoPhasesAggregate.enabled", "false") } test("count") { diff --git a/docs/Configuration.md b/docs/Configuration.md index d4ca9c10fb76..0c44341c363c 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -59,6 +59,7 @@ You can add these configurations into spark-defaults.conf to enable or disable t | spark.gluten.sql.columnar.fallback.ignoreRowToColumnar | When true, the fallback policy ignores the RowToColumnar when counting fallback number. | true | | spark.gluten.sql.columnar.fallback.preferColumnar | When true, the fallback policy prefers to use Gluten plan rather than vanilla Spark plan if the both of them contains ColumnarToRow and the vanilla Spark plan ColumnarToRow number is not smaller than Gluten plan. | true | | spark.gluten.sql.columnar.force.hashagg | Force to use hash agg to replace sort agg. | true | +| spark.gluten.sql.mergeTwoPhasesAggregate.enabled | Whether to merge two phases aggregate if there are no other operators between them. | true | | spark.gluten.sql.columnar.vanillaReaders | Enable vanilla spark's vectorized reader. Please note it may bring perf. overhead due to extra data transition. We recommend to disable it if most queries can be fully offloaded to gluten. | false | | spark.gluten.sql.native.bloomFilter | Enable or Disable native runtime bloom filter. | true | | spark.gluten.sql.native.arrow.reader.enabled | Enable or Disable native arrow read CSV file format | false | diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/extension/MergeTwoPhasesHashBaseAggregate.scala b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/MergeTwoPhasesHashBaseAggregate.scala similarity index 96% rename from backends-clickhouse/src/main/scala/org/apache/gluten/extension/MergeTwoPhasesHashBaseAggregate.scala rename to gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/MergeTwoPhasesHashBaseAggregate.scala index a10659b6d5e7..a034a3229a88 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/extension/MergeTwoPhasesHashBaseAggregate.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/MergeTwoPhasesHashBaseAggregate.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.gluten.extension +package org.apache.gluten.extension.columnar import org.apache.gluten.GlutenConfig @@ -42,6 +42,7 @@ case class MergeTwoPhasesHashBaseAggregate(session: SparkSession) val scanOnly: Boolean = glutenConf.enableScanOnly val enableColumnarHashAgg: Boolean = !scanOnly && glutenConf.enableColumnarHashAgg val replaceSortAggWithHashAgg: Boolean = GlutenConfig.getConf.forceToUseHashAgg + val mergeTwoPhasesAggEnabled: Boolean = GlutenConfig.getConf.mergeTwoPhasesAggEnabled private def isPartialAgg(partialAgg: BaseAggregateExec, finalAgg: BaseAggregateExec): Boolean = { // TODO: now it can not support to merge agg which there are the filters in the aggregate exprs. @@ -59,7 +60,7 @@ case class MergeTwoPhasesHashBaseAggregate(session: SparkSession) } override def apply(plan: SparkPlan): SparkPlan = { - if (!enableColumnarHashAgg) { + if (!mergeTwoPhasesAggEnabled || !enableColumnarHashAgg) { plan } else { plan.transformDown { diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala index f394b4687d3d..33bf1a1ec97e 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.execution import org.apache.gluten.execution.HashAggregateExecBaseTransformer -import org.apache.gluten.utils.BackendTestUtils import org.apache.spark.sql.{DataFrame, GlutenSQLTestsBaseTrait} import org.apache.spark.sql.execution.aggregate.{ObjectHashAggregateExec, SortAggregateExec} @@ -85,7 +84,7 @@ class GlutenReplaceHashWithSortAggSuite withTempView("t1", "t2") { spark.range(100).selectExpr("id as key").createOrReplaceTempView("t1") spark.range(50).selectExpr("id as key").createOrReplaceTempView("t2") - Seq(("COUNT", 0, 1, 2, 0), ("COLLECT_LIST", 2, 0, 2, 0)).foreach { + Seq(("COUNT", 1, 0, 1, 0), ("COLLECT_LIST", 1, 0, 1, 0)).foreach { aggExprInfo => val query = s""" @@ -99,11 +98,7 @@ class GlutenReplaceHashWithSortAggSuite |) |GROUP BY key """.stripMargin - if (BackendTestUtils.isCHBackendLoaded()) { - checkAggs(query, 1, 0, 1, 0) - } else { - checkAggs(query, aggExprInfo._2, aggExprInfo._3, aggExprInfo._4, aggExprInfo._5) - } + checkAggs(query, aggExprInfo._2, aggExprInfo._3, aggExprInfo._4, aggExprInfo._5) } } } diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala index 332c21418a9b..33bf1a1ec97e 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala @@ -84,7 +84,7 @@ class GlutenReplaceHashWithSortAggSuite withTempView("t1", "t2") { spark.range(100).selectExpr("id as key").createOrReplaceTempView("t1") spark.range(50).selectExpr("id as key").createOrReplaceTempView("t2") - Seq(("COUNT", 0, 1, 2, 0), ("COLLECT_LIST", 2, 0, 2, 0)).foreach { + Seq(("COUNT", 1, 0, 1, 0), ("COLLECT_LIST", 1, 0, 1, 0)).foreach { aggExprInfo => val query = s""" diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala index 332c21418a9b..29ccd9b71dcd 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala @@ -81,24 +81,27 @@ class GlutenReplaceHashWithSortAggSuite } testGluten("replace partial and final hash aggregate together with sort aggregate") { - withTempView("t1", "t2") { - spark.range(100).selectExpr("id as key").createOrReplaceTempView("t1") - spark.range(50).selectExpr("id as key").createOrReplaceTempView("t2") - Seq(("COUNT", 0, 1, 2, 0), ("COLLECT_LIST", 2, 0, 2, 0)).foreach { - aggExprInfo => - val query = - s""" - |SELECT key, ${aggExprInfo._1}(key) - |FROM - |( - | SELECT /*+ SHUFFLE_MERGE(t1) */ t1.key AS key - | FROM t1 - | JOIN t2 - | ON t1.key = t2.key - |) - |GROUP BY key + // TODO: Should can merge aggregates even if RAS support. + withSQLConf("spark.gluten.ras.enabled" -> "false") { + withTempView("t1", "t2") { + spark.range(100).selectExpr("id as key").createOrReplaceTempView("t1") + spark.range(50).selectExpr("id as key").createOrReplaceTempView("t2") + Seq(("COUNT", 1, 0, 1, 0), ("COLLECT_LIST", 1, 0, 1, 0)).foreach { + aggExprInfo => + val query = + s""" + |SELECT key, ${aggExprInfo._1}(key) + |FROM + |( + | SELECT /*+ SHUFFLE_MERGE(t1) */ t1.key AS key + | FROM t1 + | JOIN t2 + | ON t1.key = t2.key + |) + |GROUP BY key """.stripMargin - checkAggs(query, aggExprInfo._2, aggExprInfo._3, aggExprInfo._4, aggExprInfo._5) + checkAggs(query, aggExprInfo._2, aggExprInfo._3, aggExprInfo._4, aggExprInfo._5) + } } } } diff --git a/gluten-ut/test/src/test/scala/org/apache/gluten/execution/MergeTwoPhasesHashBaseAggregateSuite.scala b/gluten-ut/test/src/test/scala/org/apache/gluten/execution/MergeTwoPhasesHashBaseAggregateSuite.scala new file mode 100644 index 000000000000..f65c71f77cf5 --- /dev/null +++ b/gluten-ut/test/src/test/scala/org/apache/gluten/execution/MergeTwoPhasesHashBaseAggregateSuite.scala @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.gluten.execution + +import org.apache.gluten.GlutenConfig +import org.apache.gluten.utils.{BackendTestUtils, SystemParameters} + +import org.apache.spark.SparkConf +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.execution.adaptive.{DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite} + +abstract class BaseMergeTwoPhasesHashBaseAggregateSuite extends WholeStageTransformerSuite { + val fileFormat: String = "parquet" + override protected val resourcePath: String = "/tpch-data-parquet" + + override def beforeAll(): Unit = { + super.beforeAll() + + spark + .sql(s""" + |CREATE TABLE t1 (id int, age int, phone int, date string) + |USING $fileFormat + |PARTITIONED BY (date) + |""".stripMargin) + .show() + + spark + .sql(s""" + |INSERT INTO t1 PARTITION(date = '2020-01-01') + |SELECT id, id % 10 as age, id % 10 as phone + |FROM range(100) + |""".stripMargin) + .show() + } + + override protected def sparkConf: SparkConf = { + val conf = super.sparkConf + .set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.ColumnarShuffleManager") + .set("spark.sql.files.maxPartitionBytes", "1g") + .set("spark.sql.shuffle.partitions", "1") + .set("spark.memory.offHeap.size", "2g") + .set("spark.gluten.ras.enabled", "false") + .set("spark.gluten.sql.mergeTwoPhasesAggregate.enabled", "true") + if (BackendTestUtils.isCHBackendLoaded()) { + conf + .set("spark.gluten.sql.enable.native.validation", "false") + .set(GlutenConfig.GLUTEN_LIB_PATH, SystemParameters.getClickHouseLibPath) + } + conf + } + + override def afterAll(): Unit = { + try { + sql("DROP TABLE IF EXISTS t1") + } finally { + super.afterAll() + } + } + + test("Merge two phase hash-based aggregate into one aggregate") { + def checkHashAggregateCount(df: DataFrame, expectedCount: Int): Unit = { + df.collect() + val plans = collect(df.queryExecution.executedPlan) { + case agg: HashAggregateExecBaseTransformer => agg + } + assert(plans.size == expectedCount) + } + + withTempView("v1") { + spark.range(100).selectExpr("id as key").createOrReplaceTempView("v1") + // no exchange hash aggregate, merge to one hash aggregate + checkHashAggregateCount( + spark.sql(""" + |SELECT key, count(key) + |FROM v1 + |GROUP BY key + |""".stripMargin), + 1 + ) + + // with filter hash aggregate + checkHashAggregateCount( + spark.sql(""" + |SELECT key, count(key) FILTER (WHERE key LIKE '%1%') AS pc2 + |FROM v1 + |GROUP BY key + |""".stripMargin), + 2 + ) + } + + // with exchange hash aggregate + checkHashAggregateCount( + spark.sql(""" + |SELECT count(1) FROM t1 + |""".stripMargin), + 2) + } + + test("Merge two phase object-based aggregate into one aggregate") { + def checkObjectAggregateCount(df: DataFrame, expectedCount: Int): Unit = { + df.collect() + val plans = collect(df.queryExecution.executedPlan) { + case agg: HashAggregateExecBaseTransformer => agg + } + assert(plans.size == expectedCount) + } + + withTempView("v1") { + spark.range(100).selectExpr("id as key").createOrReplaceTempView("v1") + // no exchange object aggregate, merge to one hash aggregate + checkObjectAggregateCount( + spark.sql(""" + |SELECT key, collect_list(key) + |FROM v1 + |GROUP BY key + |""".stripMargin), + 1 + ) + + // with filter object aggregate + checkObjectAggregateCount( + spark.sql(""" + |SELECT key, collect_list(key) FILTER (WHERE key LIKE '%1%') AS pc2 + |FROM v1 + |GROUP BY key + |""".stripMargin), + 2 + ) + } + + // with exchange object aggregate + checkObjectAggregateCount( + spark.sql(""" + |SELECT collect_list(id) FROM t1 + |""".stripMargin), + 2) + } + + test("Merge two phase sort-based aggregate into one aggregate") { + def checkSortAggregateCount(df: DataFrame, expectedCount: Int): Unit = { + df.collect() + val plans = collect(df.queryExecution.executedPlan) { + case agg: HashAggregateExecBaseTransformer => agg + } + assert(plans.size == expectedCount) + } + + withSQLConf("spark.sql.test.forceApplySortAggregate" -> "true") { + withTempView("v1") { + spark.range(100).selectExpr("id as key").createOrReplaceTempView("v1") + // no exchange sort aggregate, merge to one hash aggregate + checkSortAggregateCount( + spark.sql(""" + |SELECT sum(if(key<0,0,key)) + |FROM v1 + |GROUP BY key + |""".stripMargin), + 1 + ) + + // with filter sort aggregate + checkSortAggregateCount( + spark.sql(""" + |SELECT key, sum(if(key<0,0,key)) FILTER (WHERE key LIKE '%1%') AS pc2 + |FROM v1 + |GROUP BY key + |""".stripMargin), + 2 + ) + } + + // with exchange sort aggregate + checkSortAggregateCount( + spark.sql(""" + |SELECT sum(if(id<0,0,id)) FROM t1 + |""".stripMargin), + 2) + } + } +} + +class MergeTwoPhasesAggregateSuiteAEOn + extends BaseMergeTwoPhasesHashBaseAggregateSuite + with EnableAdaptiveExecutionSuite + +class MergeTwoPhasesAggregateSuiteAEOff + extends BaseMergeTwoPhasesHashBaseAggregateSuite + with DisableAdaptiveExecutionSuite diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala index 107c33a241fd..c46d87469036 100644 --- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala +++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala @@ -59,6 +59,8 @@ class GlutenConfig(conf: SQLConf) extends Logging { def forceToUseHashAgg: Boolean = conf.getConf(COLUMNAR_FORCE_HASHAGG_ENABLED) + def mergeTwoPhasesAggEnabled: Boolean = conf.getConf(MERGE_TWO_PHASES_ENABLED) + def enableColumnarProject: Boolean = conf.getConf(COLUMNAR_PROJECT_ENABLED) def enableColumnarFilter: Boolean = conf.getConf(COLUMNAR_FILTER_ENABLED) @@ -898,6 +900,13 @@ object GlutenConfig { .booleanConf .createWithDefault(true) + val MERGE_TWO_PHASES_ENABLED = + buildConf("spark.gluten.sql.mergeTwoPhasesAggregate.enabled") + .internal() + .doc("Whether to merge two phases aggregate if there are no other operators between them.") + .booleanConf + .createWithDefault(true) + val COLUMNAR_PROJECT_ENABLED = buildConf("spark.gluten.sql.columnar.project") .internal()